From bfda7097d513944ad71ef964a947a9040bb0b3a1 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Tue, 25 Oct 2022 13:07:17 +0200 Subject: [PATCH] Improvements to the compute graph documentation. Some compute graph nodes have been moved out of the GenericNodes.h header into their specific headers. --- ComputeGraph/FAQ.md | 323 ++++++++++++++++++ ComputeGraph/README.md | 25 +- ...ereoToMono.h => InterleavedStereoToMono.h} | 18 +- .../cg/static/nodes/cpp/OverlapAndAdd.h | 82 +++++ .../cg/static/nodes/cpp/SlidingBuffer.h | 56 +++ ComputeGraph/cg/static/src/GenericNodes.h | 73 ---- ComputeGraph/documentation/perf.png | Bin 0 -> 5148 bytes ComputeGraph/documentation/shared_buffer.png | Bin 0 -> 11760 bytes ComputeGraph/examples/example2/AppNodes.h | 1 + ComputeGraph/examples/example3/AppNodes.h | 2 + ComputeGraph/examples/example6/AppNodes.h | 1 + Include/arm_math.h | 8 +- ...eoToMono.py => InterleavedStereoToMono.py} | 6 +- cmsisdsp/cg/static/scheduler/standard.py | 4 +- 14 files changed, 500 insertions(+), 99 deletions(-) create mode 100644 ComputeGraph/FAQ.md rename ComputeGraph/cg/static/nodes/cpp/{StereoToMono.h => InterleavedStereoToMono.h} (74%) create mode 100644 ComputeGraph/cg/static/nodes/cpp/OverlapAndAdd.h create mode 100644 ComputeGraph/cg/static/nodes/cpp/SlidingBuffer.h create mode 100644 ComputeGraph/documentation/perf.png create mode 100644 ComputeGraph/documentation/shared_buffer.png rename cmsisdsp/cg/static/nodes/{StereoToMono.py => InterleavedStereoToMono.py} (90%) diff --git a/ComputeGraph/FAQ.md b/ComputeGraph/FAQ.md new file mode 100644 index 00000000..2c5aadee --- /dev/null +++ b/ComputeGraph/FAQ.md @@ -0,0 +1,323 @@ +# FAQ + +## Alignment + +When the `memoryOptimization` mode is enabled, the memory can be shared between different FIFOs (when the FIFOs are in fact used as simple arrays). + +In this case, the type of the memory buffers is `uint8_t`. And this memory can be used with FIFOs of different types (`float32_t`, `q31_t`). + +So, it is important to align the buffers because the compiler may generate instructions which are requiring an aligned buffer thinking that the type of the buffer is different from `uint8_t`. + +This can be achieved by defining `CG_BEFORE_BUFFER` in the global custom header included by the scheduler. + +For instance: + +`#define CG_BEFORE_BUFFER __ALIGNED(4)` + +This is not needed when `memoryOptimization` is `False` in the Python configuration. + +The read buffer and write buffers used to interact with a FIFO have the alignment of the FIFO datatype but no more (assuming the underlying memory is aligned in case its type is `uint8_t`) + +If the number of samples read is `NR` and the number of samples written if `NW`, the alignments (in number of samples) may be: + +* `r0 . NR` (where `r0 ` if an integer with `r0 >= 0`) +* `w . NW - r1 . NR` (where `r1 ` and `w` are integers with `r1 >= 0` and `w >= 0`) + +If you need a stronger alignment, you'll need to chose `NR` and `NW` in the right way. + +For instance, if you need an alignment on a multiple of `16` bytes with a buffer containing `q31_t`, then `NW` and `NR` need to be multiple of `4`. + +If you can't choose freely the values of `NR` and `NW` then you may need to do a copy inside your component to align the buffer (of course only if the overhead due to the lack of alignment is bigger than doing a copy.) + +## Memory sharing + +When the `memoryOptimization` is enabled, the memory may be reused for different FIFOs to minimize the memory usage. But the scheduling algorithm is not trying to optimize this. So depending on how the graph was scheduled, the level of sharing may be different. + +If you need to share as much as possible memory (to minimize memory usage or the amount of memory copied), you can do it if you respect a fundamental constraint : the values in the FIFOs must have value semantic and not reference semantic. + +If you share memory, you are using reference semantic and it should be hidden from the graph : one could use a copy-on-write strategy with a reference count inside the shared buffers. The reference count could also be computed statically using C++ templates if the graph has no loops: + + + +One could define an audio buffer data type : + +```c++ +template +struct SharedAudioBuf +{ + float32_t *buf; + static int getNbSamples() {return nbSamples;}; +}; + +template +using SharedBuf = struct SharedAudioBuf; + +``` + +The template tracks the number of samples and the reference count. + +The FIFO are no more containing the float samples but only the shared buffers. + +In this example, instead of having a length of 128 `float` samples, a FIFO would have a length of one `SharedBuf<128,r>` samples. + +An example of compute graph could be: + +![shared_buffer](documentation/shared_buffer.png) + +The copy of a `SharedBuf` is copying a pointer to a buffer and not the buffer. It is reference semantic and the buffer should not be modified if the ref count if > 1. + +In the above graph, there is a processing node doing in-place modification of the buffer and it could have a template specialization defined as: + +```c++ +template +class ProcessingNode; + + +template +class ProcessingNode,1, + SharedBuf,1>: +public GenericNode,1, + SharedBuf,1> +``` + +The meaning is: + +* The input and output FIFOs have a length of 1 sample +* The sample has a type `SharedBuf` +* The reference count is statically known to be 1 so it is safe to do in place modifications of the buffer and the output buffer is a pointer to the input one + +In case of duplication, the template specialization could look like: + +```C++ +template +class DupNode; + +template +class DupNode,1, + SharedBuf,1, + SharedBuf,1>: +public GenericNode12,1, + SharedBuf,1, + SharedBuf,1> +``` + +If the input buffer has a reference count `REF`, the two output buffers will have a reference count of `REF+1`. The node is introducing some sharing. We statically know and compute the new reference count which will prevent other nodes in the graph from doing in place modifications of the buffer. + +If another node needs to modify the content of this buffer, it should allocate a new buffer. Such a node could have the following template specializations: + +```C++ +template +class ProcessingNode; + +template +class ProcessingNode,1, + SharedBuf,1>: +public GenericNode,1, + SharedBuf,1> + +template +class ProcessingNode,1, + SharedBuf,1>: +public GenericNode,1, + SharedBuf,1> +``` + +The first specialization is working with a reference count of 1 and doing in place modification. + +The second implementation is working with a reference count > 1 (and is selected if the first specialization cannot be used). In this second implementation, a new output buffer should be allocated. This new buffer would have a reference count of 1. + +The memory allocation can use a custom made memory allocator and so be acceptable in case of real-time: + +* The amount of memory to allocate during one iteration of the scheduling is constant and can be known at compile time. It depends on the level of sharing in the compute graph +* After each iteration of the schedule, this allocated memory can be released + +## Latencies + +The compute graph schedule is computed only based on the dataflow dependencies and with an attempt at minimizing the FIFO sizes. + +No other constraint is used. But in a real application, it is not enough. For instance, this kind of scheduling would be acceptable from a pure data flow point of view (ignoring FIFO sizes for this example): + +* source, source, sink, sink + +But from a latency point of view it would not be good and the following schedule should be prefered: + +* source, sink, source, sink + +The scheduling algorithm is thus using a topological sort of the compute graph to try to schedule the sinks as soon as possible in order to minimize the source to sink latency. This optimization is disabled when the graph has some loops because in this case there is no possible topological sort. + +But, even with topological sorting, there may be cases where the latency is unavoidable without changing the amount of samples read and written by the nodes. In a dynamic scheduler it is something which is more difficult to track. With a static scheduler you can see it in the generated schedule. + +As an example, if you use 10 ms audio blocks at 16 kHz you get 160 samples. + +Now, if you want to want to compute a FFT of 256 samples (smallest power of 2 > 160), then you'll need to receive 2 audio blocks before you can start to compute the FFT and generate output samples. + +So, in the generated schedule you'll see sequences like : + +* source, source ... sink + +which is showing there will be a latency issue. The connection between the sink and an audio DMA will require more buffers than with a schedule like source ... sink ... source ... sink. + +To solve this problem you need either to: + +* Use different length for the audio blocks +* Use a FFT which is not a power of 2 + + + +## Performances + +The use of C++ templates is giving more visibility to the compiler: + +* The number of samples read and written on the FIFOs are statically known ; +* When a FIFO is used as a simple array : it is statically visible to the compiler +* The dataflow between the nodes is statically visible + +It enables the compiler to generate better code for the C++ wrappers and minimize the overhead. + +As an example, let's look at the graph: + +![perf](documentation/perf.png) + +The full code for all the nodes is: + +```C++ +template +class Sink; + +template +class Sink: +public GenericSink +{ +public: + Sink(FIFOBase &src): + GenericSink(src){ + }; + + int run() + { + float32_t *b=this->getReadBuffer(); + for(int i=0;i +class Source; + +template +class Source: GenericSource +{ +public: + Source(FIFOBase &dst): + GenericSource(dst){}; + + int run(){ + float32_t *b=this->getWriteBuffer(); + for(int i=0;i +class ProcessingNode; + +template +class ProcessingNode: +public GenericNode +{ +public: + ProcessingNode(FIFOBase &src,FIFOBase &dst): + GenericNode(src,dst){}; + + int run(){ + float32_t *a=this->getReadBuffer(); + float32_t *b=this->getWriteBuffer(); + arm_offset_f32(a,0.5,b,inputSize); + + return(0); + }; + +}; +``` + +The `input` and `output` arrays, used in the sink / source, are defined as extern. The source is reading from `input` and the sink is writing to `output`. + +If we look at the asm code generated with `-Ofast` with armclang `AC6` and for one iteration of the schedule, we get: + +```txt +PUSH {r4-r6,lr} +MOVW r5,#0x220 +MOVW r1,#0x620 +MOVT r5,#0x3000 +MOV r4,r0 +MOVT r1,#0x3000 +MOV r0,r5 +MOV r2,#0x200 +BL __aeabi_memcpy4 ; 0x10000a94 +MOVW r6,#0x420 +MOV r0,r5 +MOVT r6,#0x3000 +MOVS r2,#0x80 +VMOV.F32 s0,#0.5 +MOV r1,r6 +BL arm_offset_f32 ; 0x10002cd0 +MOV r0,#0x942c +MOV r1,r6 +MOVT r0,#0x3000 +MOV r2,#0x200 +BL __aeabi_memcpy4 ; 0x10000a94 +MOVS r1,#0 +MOVS r0,#1 +STR r1,[r4,#0] +POP {r4-r6,pc} +``` + +It is the code you would get if you was manually writing a call to the corresponding CMSIS-DSP function. All the C++ templates have disappeared. The switch / case used to implement the scheduler has also been removed. + +The code was generated with `memoryOptimization` enabled and the Python script detected in this case that the FIFOs are used as arrays. As consequence, there is no FIFO update code. They are used as normal arrays. + +The generated code is as efficient as something manually coded. + +The sink and the sources have been replaced by a `memcpy`. The call to the CMSIS-DSP function is just loading the registers and branching to the CMSIS-DSP function. + +The input buffer `input` is at address `0x30000620`. + +The `output` buffer is at address `0x3000942c`. + +We can see in the code: + +```txt +MOVW r1,#0x620 +... +MOVT r1,#0x3000 +``` + +or + +``` +MOV r0,#0x942c +... +MOVT r0,#0x3000 +``` + +just before the `memcpy` + + + +It is not always as ideal as in this example. But it demonstrates that the use of C++ templates and a Python code generator is enabling a low overhead solution to the problem of streaming and compute graph. + diff --git a/ComputeGraph/README.md b/ComputeGraph/README.md index 9f670d12..4bcc1ba7 100644 --- a/ComputeGraph/README.md +++ b/ComputeGraph/README.md @@ -2,6 +2,10 @@ ## Introduction +Embedded systems are often used to implement streaming solutions : the software is processing and / or generating stream of samples. The software is made of components that have no concept of streams : they are working with buffers. As a consequence, implementing a streaming solution is forcing the developer to think about scheduling questions, FIFO sizing etc ... + +The CMSIS-DSP compute graph is a low overhead solution to this problem : it makes it easier to build streaming solutions by connecting components and computing a scheduling at build time. The use of C++ template also enables the compiler to have more information about the components for better code generation. + A dataflow graph is a representation of how compute blocks are connected to implement a streaming processing. Here is an example with 3 nodes: @@ -57,13 +61,13 @@ Without any scheduling tool for a dataflow graph, there is a problem of modulari With the CMSIS-DSP Compute Graph (CG) Tools you don't have to think about those details while you are still experimenting with your data processing pipeline. It makes it easier to experiment, add or remove blocks, change their parameters. -The tools will generate a schedule and the FIFOs. Even if you don't use this at the end for a final implementation, the information could be useful : is the schedule too long ? Are the FIFOs too big ? +The tools will generate a schedule and the FIFOs. Even if you don't use this at the end for a final implementation, the information could be useful : is the schedule too long ? Are the FIFOs too big ? Is there too much latency between the sources and the sinks ? Let's look at an (artificial) example: graph1 -Without a tool, the user would probably try to modify the sample values so that the number of sample produced is equal to the number of samples consumed. With the CG Tools we know that such a graph can be scheduled and that the FIFO sizes need to be 11 and 5. +Without a tool, the user would probably try to modify the number of samples so that the number of sample produced is equal to the number of samples consumed. With the CG Tools we know that such a graph can be scheduled and that the FIFO sizes need to be 11 and 5. The periodic schedule generated for this graph has a length of 19. It is big for such a small graph and it is because, indeed 5 and 7 are not very well chosen values. But, it is working even with those values. @@ -97,15 +101,15 @@ First, you must install the `CMSIS-DSP` PythonWrapper: pip install cmsisdsp ``` -The script inside the cmsisdsp wrapper can be used to describe and generate the schedule. +The functions and classes inside the cmsisdsp wrapper can be used to describe and generate the schedule. -You can create a `graph.py` and include : +To start, you can create a `graph.py` file and include : ```python from cmsisdsp.cg.static.scheduler import * ``` -You can describe new type of blocks that you need in the compute graph if they are not provided by the python package by default. +In this file, you can describe new type of blocks that you need in the compute graph if they are not provided by the python package by default. Finally, you can execute `graph.py` to generate the C++ files. @@ -113,7 +117,7 @@ The generated files need to include the `ComputeGraph/cg/static/src/GenericNodes If you have declared new nodes in `graph.py` then you'll need to provide an implementation. -More details and explanations can be found in the documentation for the examples. The first example is giving all the details about the Python and C++ sides of the tool: +More details and explanations can be found in the documentation for the examples. The first example is a deep dive giving all the details about the Python and C++ sides of the tool: * [Example 1 : how to describe a simple graph](documentation/example1.md) * [Example 2 : More complex example with delay and CMSIS-DSP](documentation/example2.md) @@ -126,6 +130,10 @@ Example 7 is communicating with OpenModelica. The Modelica model (PythonTest) in Example 8 is showing how to define a new custom datatype for the IOs of the nodes. Example 8 is also demonstrating a new feature where an IO can be connected up to 3 inputs and the static scheduler will automatically generate duplicate nodes. +## Frequently asked questions: + +There is a [FAQ](FAQ.md) document. + ## Cyclo static scheduling Beginning with the version 1.7.0 of the Python wrapper and version >= 1.12 of CMSIS-DSP, cyclo static scheduling has been added. @@ -147,7 +155,7 @@ In the second case, we have the flexibility but it is no more synchronous becaus But we can observe that even if is is no more stationary, it is periodic. After consuming 160 samples the behavior should repeat. -One can use the resampler in the [SpeexDSP](https://gitlab.xiph.org/xiph/speexdsp) project to test. If we decide to consume only 40 samples in input to have less latency, then the resample of SpeexDSP will produce 37,37,37 and 36 samples for the first 4 executions. +One can use the resampler in the [SpeexDSP](https://gitlab.xiph.org/xiph/speexdsp) project to test. If we decide to consume only 40 samples in input to have less latency, then the resampler of SpeexDSP will produce 37,37,37 and 36 samples for the first 4 executions. And (40+40+40+40)/(37+37+37+36) = 160 / 147. @@ -370,9 +378,6 @@ It is a first version and there are lots of limitations and probably bugs: - Some checks are missing : for instance you can connect several nodes to the same io port. And io port must be connected to only one other io port. It is not checked by the script. - The code is requiring a lot more comments and cleaning - A C version of the code generator is missing -- The scheduling algorithm could provide different heuristics: - - Optimizing code size - - Optimizing memory usage - The code generation could provide more flexibility for memory allocation with a choice between: - Global - Stack diff --git a/ComputeGraph/cg/static/nodes/cpp/StereoToMono.h b/ComputeGraph/cg/static/nodes/cpp/InterleavedStereoToMono.h similarity index 74% rename from ComputeGraph/cg/static/nodes/cpp/StereoToMono.h rename to ComputeGraph/cg/static/nodes/cpp/InterleavedStereoToMono.h index e85aa54a..9d3a4726 100644 --- a/ComputeGraph/cg/static/nodes/cpp/StereoToMono.h +++ b/ComputeGraph/cg/static/nodes/cpp/InterleavedStereoToMono.h @@ -1,7 +1,7 @@ /* ---------------------------------------------------------------------- * Project: CMSIS DSP Library - * Title: StereoToMonoQ15.h - * Description: Stereo to mno stream in Q15 + * Title: InterleavedStereoToMono.h + * Description: Interleaved Stereo to mono stream in Q15 * * $Date: 06 August 2021 * $Revision: V1.10.0 @@ -31,13 +31,13 @@ template -class StereoToMono; +class InterleavedStereoToMono; template -class StereoToMono: public GenericNode +class InterleavedStereoToMono: public GenericNode { public: - StereoToMono(FIFOBase &src,FIFOBase &dst): + InterleavedStereoToMono(FIFOBase &src,FIFOBase &dst): GenericNode(src,dst){}; @@ -54,10 +54,10 @@ public: }; template -class StereoToMono: public GenericNode +class InterleavedStereoToMono: public GenericNode { public: - StereoToMono(FIFOBase &src,FIFOBase &dst): + InterleavedStereoToMono(FIFOBase &src,FIFOBase &dst): GenericNode(src,dst){}; @@ -74,10 +74,10 @@ public: }; template -class StereoToMono: public GenericNode +class InterleavedStereoToMono: public GenericNode { public: - StereoToMono(FIFOBase &src,FIFOBase &dst): + InterleavedStereoToMono(FIFOBase &src,FIFOBase &dst): GenericNode(src,dst){}; diff --git a/ComputeGraph/cg/static/nodes/cpp/OverlapAndAdd.h b/ComputeGraph/cg/static/nodes/cpp/OverlapAndAdd.h new file mode 100644 index 00000000..19a9b3b5 --- /dev/null +++ b/ComputeGraph/cg/static/nodes/cpp/OverlapAndAdd.h @@ -0,0 +1,82 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: OverlapAndAdd.h + * Description: Overlap And Add + * + * $Date: 25 October 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _OVERLAPANDADD_H_ +#define _OVERLAPANDADD_H_ + + +template +class OverlapAdd: public GenericNode +{ +public: + OverlapAdd(FIFOBase &src,FIFOBase &dst):GenericNode(src,dst) + { + static_assert((windowSize-overlap)>0, "Overlap is too big"); + memory.resize(overlap); + }; + + int run(){ + int i; + IN *a=this->getReadBuffer(); + IN *b=this->getWriteBuffer(); + + for(i=0;i 0) + { + + memcpy((void*)b,(void*)memory.data(),(windowSize-overlap)*sizeof(IN)); + + memmove(memory.data(),memory.data()+windowSize-overlap,(2*overlap - windowSize)*sizeof(IN)); + memcpy(memory.data()+2*overlap - windowSize,a+overlap,(windowSize-overlap)*sizeof(IN)); + } + else if (2*overlap - windowSize < 0) + { + memcpy((void*)b,(void*)memory.data(),overlap*sizeof(IN)); + memcpy((void*)(b+overlap),(void*)(a+overlap),(windowSize - 2*overlap)*sizeof(IN)); + + memcpy((void*)memory.data(),(void*)(a+windowSize-overlap),overlap*sizeof(IN)); + } + else + { + memcpy((void*)b,(void*)memory.data(),overlap*sizeof(IN)); + + memcpy((void*)memory.data(),(void*)(a+overlap),overlap*sizeof(IN)); + } + + return(0); + }; +protected: + std::vector memory; + +}; + +#endif \ No newline at end of file diff --git a/ComputeGraph/cg/static/nodes/cpp/SlidingBuffer.h b/ComputeGraph/cg/static/nodes/cpp/SlidingBuffer.h new file mode 100644 index 00000000..a9c1c338 --- /dev/null +++ b/ComputeGraph/cg/static/nodes/cpp/SlidingBuffer.h @@ -0,0 +1,56 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: SlidingBuffer.h + * Description: Sliding buffer + * + * $Date: 25 October 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SLIDINGBUFFER_H_ +#define _SLIDINGBUFFER_H_ + + +template +class SlidingBuffer: public GenericNode +{ +public: + SlidingBuffer(FIFOBase &src,FIFOBase &dst):GenericNode(src,dst) + { + static_assert((windowSize-overlap)>0, "Overlap is too big"); + memory.resize(overlap); + }; + + int run(){ + IN *a=this->getReadBuffer(); + IN *b=this->getWriteBuffer(); + memcpy((void*)b,(void*)memory.data(),overlap*sizeof(IN)); + memcpy((void*)(b+overlap),(void*)a,(windowSize-overlap)*sizeof(IN)); + memcpy((void*)memory.data(),(void*)(b+windowSize-overlap),overlap*sizeof(IN)) ; + return(0); + }; +protected: + std::vector memory; + +}; + +#endif \ No newline at end of file diff --git a/ComputeGraph/cg/static/src/GenericNodes.h b/ComputeGraph/cg/static/src/GenericNodes.h index be7fd153..ff28f74c 100644 --- a/ComputeGraph/cg/static/src/GenericNodes.h +++ b/ComputeGraph/cg/static/src/GenericNodes.h @@ -242,79 +242,6 @@ private: #define REPEAT(N) for(int i=0;i -class SlidingBuffer: public GenericNode -{ -public: - SlidingBuffer(FIFOBase &src,FIFOBase &dst):GenericNode(src,dst) - { - static_assert((windowSize-overlap)>0, "Overlap is too big"); - memory.resize(overlap); - }; - - int run(){ - IN *a=this->getReadBuffer(); - IN *b=this->getWriteBuffer(); - memcpy((void*)b,(void*)memory.data(),overlap*sizeof(IN)); - memcpy((void*)(b+overlap),(void*)a,(windowSize-overlap)*sizeof(IN)); - memcpy((void*)memory.data(),(void*)(b+windowSize-overlap),overlap*sizeof(IN)) ; - return(0); - }; -protected: - std::vector memory; - -}; - -template -class OverlapAdd: public GenericNode -{ -public: - OverlapAdd(FIFOBase &src,FIFOBase &dst):GenericNode(src,dst) - { - static_assert((windowSize-overlap)>0, "Overlap is too big"); - memory.resize(overlap); - }; - - int run(){ - int i; - IN *a=this->getReadBuffer(); - IN *b=this->getWriteBuffer(); - - for(i=0;i 0) - { - - memcpy((void*)b,(void*)memory.data(),(windowSize-overlap)*sizeof(IN)); - - memmove(memory.data(),memory.data()+windowSize-overlap,(2*overlap - windowSize)*sizeof(IN)); - memcpy(memory.data()+2*overlap - windowSize,a+overlap,(windowSize-overlap)*sizeof(IN)); - } - else if (2*overlap - windowSize < 0) - { - memcpy((void*)b,(void*)memory.data(),overlap*sizeof(IN)); - memcpy((void*)(b+overlap),(void*)(a+overlap),(windowSize - 2*overlap)*sizeof(IN)); - - memcpy((void*)memory.data(),(void*)(a+windowSize-overlap),overlap*sizeof(IN)); - } - else - { - memcpy((void*)b,(void*)memory.data(),overlap*sizeof(IN)); - - memcpy((void*)memory.data(),(void*)(a+overlap),overlap*sizeof(IN)); - } - - return(0); - }; -protected: - std::vector memory; - -}; template class Duplicate2; diff --git a/ComputeGraph/documentation/perf.png b/ComputeGraph/documentation/perf.png new file mode 100644 index 0000000000000000000000000000000000000000..1b08daeb42e984ccc9cea48b3f69094c3c6d039b GIT binary patch literal 5148 zcmbVQc{o)6+nyF{$QpxWnJ5ZbLTJL+nXyH(W#2N{m$6m0h(efTC;LveVJL&_WM`-> zLyc@rLzdxpe1GpB?|Z$!>w2&2z2=&8&Ybg^&*wSke$MmU_Z@2h*JPyUrU!vQjM`c- zBM|7UHgK#sGi*(3ku`6T=sm8)IjDz|vbCFhom7j9AC!88m#|ZqUP2H&eIBTyFgfgU8GCy|205g1u2eCzd^k0jwZ!=aPr)dX#_;6at>mJjB1vg!HefhaDyNtC?-lKSRyuYf zIN#o6&ZC2z%OKE7d6;+q}-$En0J_pulTmE#txiQr0AlR}kkY&iJ9(_X~k^w~I z&QI1UlvGrFX;jMYXEeuh_YD8k^fZN9+Pk8<`X#6iYB2Wm!%h7J4K)){j&;m~8}uN- zoDmCTaB#4ly*=(gf}$mtip471zAL&p`1rP1-E0LYcj#jE;uLbZfX- znX04yw&?oq?k=hyT_Yd7TLF9@pP~ojCC4*Q$QrVJ_QzLR9x=lx#YyZeUMAMVI%T3< za?m?e1k+j4ep|0oefH9(Y>1rPxQqw#r|(?R;L`a72&;_OgtV%`G)$itztPOCUCV{E zu(XV{<$7Rktx-SwSTUq}(#_VxV~7}?dJGw}3LXAA_4szB_?T&VR7;Bj{%xr^lZbbS zg@3I-k*KWIOjuZ1c|VlH>pbFApO*2f-#-~%>@hv$#Fnl{U5v=tyzXI*8(AZPcGna@ zhYPEq?MXA6*&}@fLg-CjjhK?EiTU}b1cIY`V1a45DP zSr?hPicu66=_9Q_j;S&c&3{-K=X=NFr5fe8R`GbFJNQZM19nx~+LjjAb?=Q-`LIO= zl0l($wS{ASef=_b&)2WnjW1F5_G+M-ehj)98z~W4GiFw50t5aAfP2V?9MK#q`2*>z zOLM&171q)(z-8Xi*85lln|rVh!Q=G)3nl4Ze~VO$wT6^{bh$F6^|@`;yxbv+YKCr4 z9*~8wR)nTt0?Q<#(1-aon zw{|xje0+@Y20!C%B)ozdZ3KDd9^Wn}3vt-+wtmv=)?!t)`HE?|jFeixFAxbKU-^gQl^uVUUq z+^`L_%P`06IS?;2j2E=M^ZCj}h_ZucV3K<^Dvxtkcn4Up z-PgT|;(2@&nj#W zZvyVmA76gg#5iA9*FJg+q^R(9c6!j$$;tZz2=>+%jf9XryIPkP*$n;p)_{>ljD2u0 zJ}ul^T??xC1x5F*MM_;%vWY{CSO#XjxI)uQb0uVSvf2#$UA<9TDX%J4VgabK4pQK^ zMX^y)Q6D>h@pLShZ_j$r{6d7hktVE@QFv-Q@5#8#YXG|Vo*P+W_A3zhqi||0k}4Bc z6`!<6kS6xilY_`>jnDIWyw<7FZ=5ETWb}<&<5Zf62(%or>=j%LzGBIm8~VIzdYb^V zR+vSkovf}F-E5PumIrq)-Zr-;&NFF?q?5}y+j@5;+2oXk&im=}ayhhC_Inh3sj(VR zG8_0b^m-85WQ&ks=+1cIgKK}7bCtkr>Cc;uI@RH95S+fuEez5*433xOiXMF zHgN52R#x{=h8&QT;jpym5<|0!E?Y7^_-={&t_aiyi{g0AryO^K5eEY5x#;ItmvKa* z=;lHzr&}A)zNV|JaJBZIRTHNBa4{Emx;bsWH%wdOES_Ma@J&w&zPP~N71bo^7G<5# z(Ip#B%eJ<*{$0&@lBfldymjkYb;t2sBX>&RvcUu$m!F31aqO#|ogIU3p&=o>dt2Ww z-n=V!Oxd#m9r}3jDFr@_xp-yiH(?;~gtE6Y0zLWO70bp_?71C3cl1_XvJ6GdWo1R0 zwh1>s2|u9(gzOX0Ktm2B4auDW0Ugos>c%}4q3d?hp-a5AJ}bQ}>mAHzN3xY8Y`LzX z2LczPI7eM%w)eh;FQ=MW$4aI;%;aW+m90@fuj0w$#fUSYj-%ffK;L&a@PTVt0ow=b z)iK$P5$6-$zgRRQyLb^Nu1~nhaz#JmvTPDAVOTkwCv%N|5{dQS(Y@Ni4J4MhN2 zpZe{L?%`o3K4nzo$lb$=;5N9r}>Ng;}|ChP!#}LGSi6s_TLFnKSAnYWm-c_ zzsk~Jx^#2kj-c6k;;WG3{k4)wx4-0R8!tjX$I({!$+{Tt{cHx@=y7r%1Ryl#BFYBh z=*kcu<6ju1-GR=|vrf*=hN7uT`^DJ;*6Hc#pCV{3ZvWk}2i9!Pj+%~@#BZ30PTs#H zSbEaln9u&ze=|jswnQkb+3vqCQp#0&})^FMVMkFf|tT? zZZ>UoobY`-j_pW>3ECD-5{ryedul@WX@(qB-t>N8mB_f#hY8c)FDpi)x6j6R4 zFmbR|enrQ3tI$y}LkE~uuEX;LEF(NK5R6>+?QE$(zOZxj!NYt6+j~z$!I-k2#IX>& z<@8?;#XDYF*gUJhT3F zWJ4iVW>|0hLp*EP2|Gnp3tFp z$-S_we)pzq$eNG$-@)?QvFgoOosnNLv#mUS`ZbY;)P^7M*bgIha`!jRY>6b$%xRsS zYdwr|${njxb>O2%ueX?)-qQn>}EX%k_>mP>o#$k2;Qm&=j4A!+6Q@beT7 z1rZ(R3SZ2(*Jm%;h4+d9N=4&!m7)z=&bvfUj$u&~>W4ztkO#xwVh;zbhsoOC(b4JcR^+)RkMaTF5{VNZexm}I%Ts#Y!flH<>0!*JK84mt7 zHkQn>ySXX8y1IG;8l`#$RD;2|?R()9WHN@RldHm5&KhuD0Qa=>(7jekQ4tCH{nAX~ zf~qM44$x<+f=M5<6ax$@O_TZ@D%JozVvI^7Pn9Z(L}FHb1PH6floqxk*Zc=JmJZ14 z9(!%88G~$V{rn;{+9M#K@ALnVqRn5})fIm8%=O)a2M;{SY&w)ZN4wa~OQrZ5K}XJ1 zNF?%MS}7(y;~-U3#Gs6HHm=(a|F-aGDaGcI_ixDYVg7jK(>J{wSAVyMXi(l6=a_{P zTN&-9u$)hjVc(^C^iii7U60Yv0?oS?;NVi1UJA%h^S;B*a^lW*N0nIMM4LB1N#(ZD{{^D0Evd+a+Lhx-+Bc+xt~O~ ztl~{&m?YlbUmC7ePu~XKJi}}36@z_F^>fHoU|v#j#;AGqG`>f}5sfk4DPa?xlz-3a zs~dxP>2)UySq>YMTyGS5lBi)M@@$8j`llY(Y08iFy|GxXMj!&fKZKpfa;}&sNeRZZ z`;wl|Z?U;10ad*ot%0mU;h%$&dJ$hn|#KP9$5VGrNHVN8K5ij0h_#S z4&lXB#!bO@0{kQPfcxc-aa!qHK*lF4QE6A2;*x|kEAq}vQYK1`vmI`UGpPO>&3GyK z9~--;Z@R&&tJ;M|&EY3^8yg!tUx-Tq=0Pn$xB&I76Ji4U!ln+6k5i)R1Dpb3Vc|Ij z2uMEcNFKoUd@1pD4VmGd6&p>bctkTXy=Fc9n#S17440{?spoP}Le-l~7eyN$GV1E; zJ}@``G&3{vaoOk4n}3nzJ~;ll64emEBmm_~XcS_0YvA7eid_#n8!!=B=?o&ilV?^3 z14;;Hc%pVI`6)&Ey{Va**Vb2TO+G^^kh$*DRa9Zyh3x0E?r}v^iUrLSOQ2zNyPCWM zk7fwhU4X$mzXv3iSBEmD6bxD{nGo4Zp9*!+`S5IK>O$wIfm0+yA*3U-oU(N9!B$V+ zs9k5fBlqW>A%3oj;FkNo5@+wJb1-<_jId2B&IR-E;Py#tUVw@LD6I%_<0UpiS<8NGymDUIbTQhvC^Um!&i9kmr~Gg2oaLVRoBqqsHl+5 z3^f>Gmea^vV9Gx}yq!Fpt-P+tQqbG?MkGC;3ovL*Cbaq0sL2dpZi)H{zBt%Ad}?v( v3nkh{{(qdK{~2Bkh+(gN literal 0 HcmV?d00001 diff --git a/ComputeGraph/documentation/shared_buffer.png b/ComputeGraph/documentation/shared_buffer.png new file mode 100644 index 0000000000000000000000000000000000000000..a83462e8781d4271e5b131c5e7c540a3b66c5c16 GIT binary patch literal 11760 zcmb_?1yqz@`|XGb2q;}jh@?`|h{J#i4Bah)(hU;A(1?QMNJ%qvkBD@G2uMpyhvd-R za1Vap?;q>lb?>@&-NRbUJFIt@bI$vm=h=HdJ4i(dehdE&J_G`}B`YJT27%y^gX`CL zSm1YY@q{&a!7-ADOF}NMep2glq9Bla5LwBm8g5DJlRg%Am(Fpw;O9*hAM6~XhO>W{ zUFuM(3psSWDm#TMu1k!6b!z)c_oQH?DOw}oPLiHa=F|Jwfo48G!*uZvZ0wnGLu}Z- zc2|^#w&Db$)n0M%WnfojXxOQJN_g8p79RSxZj%9Zyt3d86-`#^Pd@V|dB-^^cDU?( z*f$C9B|@dM6M_l;9gRP2%f9^c22x2C3IFHKEBtn*f36>Wm?!z?I_}vH-apq0j0o(1 zuCqTuuU7wesLRk_`}d{q17_sEFCmSCbXi|!c8?#b5{;;lXANsY*t;7W8(TuCtX~qa z6Wk}dy3tpglZS@^>h^WGj9t;k=kfyqRUwa|e?Iao`*KZ#1ZJ0elh*Ckjc_twObP$^9q^ym z*eeyLRkz+eSDb0Q&f$;*8N}D4M@VqTzR=fSTV37VSd46+UtZ4bUAjw0H~IUw!`FGe z_60|Eb@g{Qva_@CKRg@4Tvxe?2jkW#n8{#QP^IOe(9cg`5S|MqVC?ZtWR!@(zSiM% zKSG9?l!Qlpfoa!TE{NJ#US2-y%Yqbm5jsHH21(whCgf{<*EEB{Siw+BFLIU&v}pDh z36b#@0;0mg=5>ujLz=BRtWm1uEc8hENM-{Vs+Wy4sH*Dmd1SnTbX}}K(U-zPnaCg1 zQOpnMk>l*s={7=l32_EiN>fWU9=hAK@GYjd>HL^@#`((R zPtcw1TQBB!k3WC@jDw4dF5(juq`?2MxVV_&eX^3B&062`EN!dxu00_U5r6CQP;0C7 z81^((g1A%FL=1=a;?k0F@$X{ewvC$2`u28NFV#RIhQ$}X3y#G)oaFejkpkd24Gb6; zk9Br-;u6twx?KJq7#U&fbbJ_tSzew|KJhg45iKPnmx?g0Dk|ckCQN}MBHwDKspuFO zz#QlCj5%pXP&BDD0pMR$D`1_qtKA$&YVqKZS*b zg;4SEwVs@w4sDJ(kOg?c3?ES^#mA?KuRM}xCI!d%#fulVwzdi9mUW&_CmpNw)zt?u znEXWvJ`V6_bVY@5Vt0R_Cf)Vx*F%d5sJJz3Ll$P*FdZEoXJ=;-OmGn5eHrk>jmZLU zP0xp^eEwWu3<3lvmdCi2|kM3trDB>oo4pGY5n>s-KF4fJ zM)3;SXml9;fKh01fp!s&f79rwu7L&|j=P@M*ViY*{Jpnd;-16uFGeX<3zXjR7xdKw z*Zq9BJ}wM~h{*EcX?!0pN`aL{RxW3NTx&U0?Csj`wKW@|#5%;T*^3wBVRwbi%*+T~ zqhHyFhK33V3oq>=8m1d6D=o5m22CP(S&wQ5{X*#n9)GQ`pPHOhBpIHf&rtn!n^vR| z6(1M(b7}YG%a=itbLWTCJZx&te}{^uxw&~wO^t`Udu}O*_LoPk2Wz9o#q4@J9WflW z#t$vF{Mq6a=wY624h~x&QrFklSDf89r|RCRPd#$q(!O;aYnkyV8ovoX<4**ublW!a z@$r$hep&N&9mKTsj?CIvnf1t*mrh?BP54$`SsH!h&vIn$Stek7!rx<2d%96W*q(vU z!B%cq>-Of=D~TkiBR-Z!9yna&&Cr|T-h;@Mt&HA^sw&;xD2TC5Z;IH7;Q&W^=36R|FWC zxag7A)fV_~WkhIiK$181HE**_cyq2$Q4+YbhIY#A36*IZk?VJz65KFDQOqcCRhco#I?RPov}Hk&cxe z?e53hOy`K)6+<}`cSBprw?vWdT(gr{0nXvtlM07mt5To0t0E?Im8tAoV7vL6G(3f)_SO^w2EY@u`^;cR5$ieSuD$`a4(-Q`03ciq6(UXNI zGu5U*)q<0E=9U@p@KRKwerDV|iW_-q9Ei)njI?T5ZD)EX!sLcbI8-xB;^lZhZuv&~ zdV9u0cDC{o3@Z{3D|$xEYN}JIo|u$fP&9r)7P~F-f<@GBUGsNXdxo=UcQ2*OqVlEg z=hDP3t-+Du0}}Ou7b2!L==Y+IL4UBiRpOL#*4J~>0=-(o_(n_((MfBWN4WPEvQj?|aWyWD1bcP=!wJox#4CJ(=PRf=e`aoyrmMQnCuT=u8O+(aryS<-AA2+}58Ii%$h zf%ijV2jiWOnQ9N@(A($FJmix1O+sme&eY#~e4;XS+Z*^^`{0#qK7ydl&UQ5SYS)Y1 zCVr7bwVOm6J?XHq|Bd%z&Q@19Ia;7cTveIPvb?swJIC!fJ?4QP$KiXfEQ3h6S)arM zN->Zd$@0b3uY=r(RR09cPf92Shht75=1=Hr#wXAoi+c!-8w)?)YPF2kk)&Ro7>_<# z6kGAp@lM-dn({qCn4j1sfYev$~aLixI)@WNS|V|R{gnPr7GT=f&6_<_yQ z5n1$Ud(b-BDdQ-|Fi$SI2GzHprJJ*&-jA(V!eVTM3O3+(2yqa4u{^!1iiNoVh~B57 zlQ{NFu|uwK&TYOspio}!N9|4V#}nE=itjc9$9{Gym0I$7CC7LM$!_kCkvUvf(d2Aw zw&`9oRO~k|)~}@FKHG=6w}}&o61K>%YbHt)WQcCrV+9LhKAGs$QwS6$FS3(HOP^{@ zzNyBCCP34B!(BgT#}Xn<-s}-$V=L!+@8C7TZ#Cu6%Ft_uk3_*g@>|7(@XtL9&Id*6 zpEz@gg5#-JRT6*e2B{Lorpn-$a9zUd$kHHPm}7%0cd37#WZ^-)UFoikko>4t_T2{$ zquO(866GQ#ZGRs%<;$A)&%EBdW}$>sV13SW!~f}OJX02?ok>a+WV>lbjBq+ke&0t; zA1|=Elx*4dF9vvu=KuXRK&hd5h*>rvF>#=f`=WM`qWqgM;mDl-@4P(m4FEPTxCa{; zf7$h&`Knl<@#-G9RH(hNB$u!iVB!pfN5S(ng2U4R`lP?r6>jo$Pv!kp8An0Yh&?qt znjN^X8A7n(MKj%e%t+2H_Z^w1Nvi9&n#*W|_@xL)u4?qv@*iz7BgwJZho&Cs6Hql{Kt!bh##`jsl(VV|3qc6nbbOsEngFugrD zVF%#`s9s?h%zLpb9)BnwMl0s-?Cw6+^mC@l9-sJm(t~zT$9__uf^tTiPfiUI*k;mc zH=R)+Hhb^O=J86>O0-eTHHQcjKE&D48+b52HrDFuYP-p|<;c5#ho3}a#5$HQ72{E* z?ms-Dz(oqu@lK@Vx#SfSP+Qf5MB`k`Pq~M{-nR$93Die`t-_-h4^OEmDGl6Lvcke{ zH5nx)CYF?x5D*Y}H%v#Y-#PiiaCy|l-&=byW^L%XEwxatrdp-1vzo&HB?6kBzF%SRSAKA#IAu^gjs+2?9hAjC36i13lvH=G6IwQ!l!xD1-@V zo#@cp)iqd+e|ziL(~wAdW;YMjv_7ijEAreeDeuo8AIGcl4RFd3n88!8f)Pjyb&coo z&hoMaz?o6XuwzU?k@vx{DkT%ekEPNbqGHijegghQC8;S5<)t@MPp_=b!9qP^5X5&uHT$;L9`xIBdK$)?X`CqwP=VxoQh@nW*; z)bUm`!1forsLQkI%X34i3IlnTTSaIM*S*uR^zHs@waZ|K5MXz#y>9!_eT2?eIm`Aw3i$4wp@wbt$*I&^9ZCrLIsE1r?^zF%XAN1zI486 zMm`?TZ4i$R>15o>-QM2r6~C-iTZ+lB=_(KS43}cJpuEOiURo+3An=2_X1C`_!_i#G zWR0u%ZWq7L<>|ER`q(~*yUO9R>!-ws#%>E3$y@7r8hZL36TZFL#IXN{Lw}-r%}Ovm zCI5!PyGv2+^Rvx?xVX4Tx+lX^Qz=S0$+lsJUVzM$eZlhORc@nM#qxvPswponW|URC zAF+(V7=?o928f%f@jGgYUCe? zBnoUU<9gdNT$=50zjjL}Z4YnvF7f2l65WUg48dK(&*JS{tvcq%o<|OksBNsQI@(1y(p5?gg7(US1v>(@RrvkR_gO@VWf?6VJ>QtH|2k{&I5D+x1UtnBnEg zNL^iB#k6-#OxY0vGqAF{nutOb{ZTizwGErfgV9>QmC9pz5np~Y0vF>iy0bRj&|on? zi%u?37xNo2^hxGzO(Ymi3H)H!|19m3It53RH&Mae_>r$hkn$KZ^S3{@eRk$$TXpUc ze`3z-H@^PFg3VJv<rQ!72oYtJhXd8@DWWb2vf#Fx{^tddelmw$uKm`%ntpL{ab$Kd zx1grcf;W8No+|p8Fr5-BGPj9Yu*4)Z`H5>duk`hAk6J}VMfC-1OaRAna@zg={TZ4A zOGPtbS7Tb;*v2N5SF7#Z<=k7mB}+X=DGv^PJ?@c_EH3l}+lNJz+_tmLGw z^njD(xjA=i)Ri_+q{1E=9==n4Jt1Ad1rXMB?#I^F)@R3lets(K?=1+P1YSUgG6rv+ z!|b=62a+U(l%(AwgVid24~iFUOcZ!?cJfiwl$Et;8@_k}3f}IqClJ9-_{Hvo-6kTk z-htntoVGMHMAf4y?-Tis|I)X_aFBT{dx|^E_+EFxf_!D63{g~2P^l;*V^bQe7KOaZ zAJmYQJ$ypTknBIL@Ys7`R%d4!aL&+TF^_{Sb9Thl^E^IAOzF!F(3qH*%!6#5HFf_R z=Wc*zEyt~P6TY)I_lTEQSEEdiF+&ASueMsl?$+kIP9XxTr)oh0IO7(M?Yx2Ix438~ zrP|uo=HcOS94J|qm2iCx`(#%G+4J;7>ZL<9O-j|-P^408US6J`-!&;!pyB|M+1bhK zX)sw8E3U*E70GPMN2B{$83j|6@Ax(`KJEbuzmzJ#PGB1a{As|l-Z#(A&c1F+HtBP% zuCGt&UDDRln({ui-4FfdivAX9$SdOrg1rOS5V1_-e6aF1weKnyUkcnb*xAD(ei zMllBwy?{Vo5jEsx6cAjniK$%XdF!*21GsRH_*Llf`GpRr(32;t->W1czR?K}i@h5% z%FTOI00!GXy4y4D%+6vI%EiUCf+7lRKIsSO&wh{&*|vLJdol#zT)9~7$8`1}$#<0M zMCsX6+?P@G1@a+ne-hG}Wz`xox!GuM5B%|j^-_mOX9R3ITUyH2`p8WqAmJpm-sgWu z01r({j`OoDwFgqFJI7G2C>iF#qI_lH?Re`yCK^1-LB{pIqq_mfkorv6=t`C+Ou52m z#aZvXHpW$MGW0l@T=@tS69UD5M^qUTlc9~g5{eCiPN+Lf_?%5i32{nB3z=oN-IA1c z0g}D;+SF7^m3&_ws??9=N`+L!XRBZN`i5mjXGezkE08Uxd7Ph0FUwxA z)d_o}EKM2SIP;oUx->K3pfz4>ZSx;A_&|oV_qNEev6`b|wc@I@Rp=HNO?j_qzE% z=FV@6Wj~XfER~OfzrS-Y*EQHXCh{Pn1s%d+c1o$#guGNy4`)Mm32U4=lc7?igrk*m z5cyJGWmX5ZEfNuOS-;;tXLK*LM`RkPMKa3~je6%$9{L|?mO`;MpY7y2UUB*yZ7pr> zp{CD<0@^vOm~LLCJ_~0%yB>GHL21FLcJ^Q+7Ot4yC7{YpOeA^Bx|CFcaOy4q0A(*( z#&)i&gNn*XN+|Ibj{o47;llDkd}Kuznw1$@o1XN$2Xq@M9H9uwA9dOF0Dwk8LP$BR3mz}NUZeHrk+~q7;Pkc#!OIwKB!lJ zoXz-QO#14}?xN(AB`zFwDOD{kEuhWJW|c20sxk;DCiX`fMZa!&NP-~8W^gEQSES#5 zLv8I?pa4yV_U|1p)LkB7NfqTMXHC&wIvf{76Ox*k3+$m)_SN#pH>+rG2d>memr&k9 z!HJfIB+khMykh%Y3oe$3IX+kX|y@fQYT{Rq`m^m*%u{cWig;wlG2Jgf8;%w^4RP{ zm2NaP`i7y^;{nFk_&GdG$uJw4trgT_IOJnWIYBZ48VN}_S9o)&HE*&RND3r%R`$R?fFCI^2 z){Oh~TPWd`20AAvkoWDy86t$`*L-GxMQ{>OCxKuY9v0RY1xA4V<<3Iq-VMy*sik4x zgy{SFYnI&fCq4jNXFh9fZT(_U2PRQnS;-!Ufb++f>e}j>vijjba^;28Sh7Gv#Egtb z6fvNxmM_cOi#@P;FlZIa0|eRD_F#-K@kI^pd1cd!IsjH(w8c(X0=+A8rVD=`Y<)gS z7>J8KQ`{ad2lugrYe)8bU>M#G{@THt=rZU85?4ncLg-9tatlQI)poS%h|p%Yf#%2L zcbUuD>YEUsF8|Pzy5(lN8^i`5FSxAr=!2#o+#pWaml(Y$POueL%-5e+CH8DY=emU6 z!o5wK>oPgH(dc*m-~cT#q-Q`~E@LV!p57lzjvNY3pWj7vwD7lnpM8X!9Sr0^_2T+J zoyrZA#=H)djm+FAZsB?QSEluJNge<&lOQ zK1qmO9)CKytLHiJI%A=a&NSus1QQbao%$3ojR)q^!QBg6l@5d|~c$*HhvNGFny1&2Q*VagFZDXUa zqa)&Rum<4J_t`Cg5~t;ijg48O;!!AZ7mUUuYVZ%J5Vyq7ww@a}`6f#^rQ|Q6bNJ<} z_B*HkK8ypURse@|I4SWzr1vfV_<_x%cv<_J%r_taM@se1ojXVJ!ReW5c{0q;qV!2m zyy7n93Vk|3q+vcw&bf|_jU}|3D1i$|=zH=IEpD`#TbK=}~oqFKSj+R&eRv zQuze)8A?Nvs%_0e>j3`+rA{e^Rp{~K2c0|hUZ+6O0(Bc8DSo;7U(7#NoU1{HgI9>3 zznS{U|58K;tfS@NuT1|m7*nOsO^gRlri^NISy|cC)D&n=g*i)0OM?;*D(Lj4p}u~p zntAm{^H7kt_eT_2qb#khGurkjmlLDummI6Wr@(e@Y-|)4^LmxAhAfX*y0khN8Qn+q zy1KZqkOm!2`-mrYQ*!AW;;jILxxAd3-_z6c7I5#qJr&E7_RN+e=3nJL4=b7slKOOX zbSi-^PM1#-=YBLB2*i2YLJ={s?jrhY7>R!Zks{I_SLomg;_@r<)X@;7L+n zU2P?jM0_QP%W+9^p?cL5Sx*KW_$;5=wnRy2RsOaMAs$OqqVv@PcYpjC@hcAML9S99X8*X=_kCf_cR=CVEMLCl) zGb!lgDsgF<;^{&t)Xragy+Z2o&~$Ekp1QrrQM_*(pf~mS$&;F|Utb+0Q19;H$r`3fCJGnb1FXzk~u*Q#^fUb9D zQ9c7t>VCw}(Gqh8_vNdPrlm2CPp)F(UeEK^uFA7UExgC`ApQn{=5$oNim6^<&r6Op zN~2*c^W`f0{SW|4slY=rVW{z%&m|;Z+4D%(^4)UKZYiz68to>CSNVX;$ z>~M-|wB>ywasB-aB*(bd&CZ)72bEBh2g~I+`!b#+b}XWr>y%v`itw-+G)cpB5&hct^Z>Pd(A8T3ucXMeK?DPTurgdaba{i8FYbsQ1Mc+{wr9xs@Lf}NR{ zhKJuycmO5FiypoBsBG(v8GO(k*HJndT;7v=s@rR;ECYo?yJD#_WJisPsp)Rk_L7Jh z_Vd&YTE(#4uo)1Ld|2(t9b4nP(my}Xd|n08jHsv_a_BTT+^3|@T@wsb(0EY-q1 z^RJE6AgJ?^R7M{V7hFQjT7O?53&YxuTR>!n-HAh?4v#%EGBU0h4^~Fs^48W@H>EGn(!N$! z%CPkI_F7llZ)u;Noa`;AtN9zeeDUH>FA*W(`)6tH>o@zy$7{;VrFCzPiHGvaIc!xZ z5V~TuBs=a)2cG~FNvovCH3(#9_ zTawGjHZPsdz3o}eE1Hzc`tBJk8=K|E<7;EbIVSDqL)NOJu1*$K5~aXO!~j$9@);W+7ZjE!BO{x2aC39h`K4w0 zFJh}n&XU;O@vYGEP=Jr`+ka<}&i2zlxnS=&I&!nO=b6Sw?bj}!Qsge?a4 z&TkW=qj?5i)Ae4Z3LYLcEn>#LmSf zMmVQ?X&1<1t@R{SYUC(|Y75(--6)CJwg|t#uKUWMO@8a}i?^ef*UP9#&|CNHx+kTY zJ9~E%&MK?+Fh-4Bs*c(x>8dvl2qVa0Fs}ssXw#~SibIgFgVRATTv$-haw;R|IVHqR zA*gQYtqOw%{6yorhG7ibD;#si9bebSPZlF)3unz@xiL$Eop=UvS!!#;L9HvAcedZ> z;rvo9Dyy#U(D)Ms>?f@{Q2HG47P96`1a2!0orVM7!g-EAiL{L}9cJ%gLN_^r@)$!$&} zTVJSmubf$_3RyjO?5+#a{8AQBQcwRSf1*S~#jXD;e_IkHWX5taWhjRbZ|$h_PT`cf zSYR*^3xLw)n#sNzC97>jYEcH%sQJb2M2d&{62yqyvXLsMrR84W-q52_+MaKZx+=2w zWFx~EW%i8i8BMN9ph2$zd@sH9IZqN@Sp~=-F>))Xu8$@MH=S`?PVs}-0)!Kl>S+UB zy6BJl+@h^0FE6(q;f7T^#d9jLzRZ(N)#QFz?RLFbq}WLxfnOWD`trptupZzLFVzD(rxT{ZPE?ST(V?SIR7_b%M0`MMr}AC5H@< z9wv&(j{8V%1LB49AF{$%%k*LSsC*xC|N$OJG@@+6IUPKV15+d@$F?XyYFd zIe=VB=Y&gV2;BEjbq=S({{T<`kl6qbiq>Gz{mkI8#w#0HsIa--_vwa;GSr#ivjVHU zZJC~9XU4_F1+XrujqId@7w|@J1S{19*hG{B4FfUH=day6(QV#W57=eYwx}h#AE%S7 zg8K>N){quv*>u#x=mF1e!t_#LK!AWNTv=>CnpqY=9a64i;CRCJf0|NFs!V7vlngBx z(Cxhjg)<-oBK+zz4mYQZyfqtixMMlU@grp38%0lVB3q0A4;J7_ap%2=6Y3CF>^gucQvLUstPL#|OTvc~JWq}rsulv0Eqcs{Z z=V;r1cQFmLLAe7>8GiGgWkW#JjY4%-3SWD@D6Hk?iXjV<1njA6Y7JPXa&mHRS@*zQ zW{WVWzKq4|PhOK_ZRIX3ETGQU%*9XokhkeX2gk?db(ziR%?AU$`qXEda2ZTOy|$Wg z1ByiH5gh<}B1T@i=GFYPwX-X)s2G8U2R)}GYduK!giZJNM^^q}hG;P0bm4&c34eoG zSB}K+uRgvMQ`sW)T>`@YB{)Q)t{W3qMVTLtg=wzuQg0NCFn{yr4S?YZO)+5lbK7p{ ze94aga>noKCJ9d5@BYQSq59z}1k6FJ({<8CTTy7;X8u9j@$M2Bmd=dds;a7LYoP)H z8qInjU9IW-D@Dlb#tTlQ0~=lU_1N5=!X?P7WdDd9TF{leJAKr;GLU1LyC%czxIR|a z97r@$t_9-ciPpAODp&)gZ0_)sj>hvx*<__YUnxE$_b~;cqpWE)2u4sQ$kT_=Jxb^}r3>Kp2_*}Kq8l265& z{cs^OX8+Ys>-^%%xY`C>va!`2i;LM|WpAUqyA{o>rxU8)uItImhp1;=)6k7y`rdGP zzPUr-mrgu>4HMd1#y|uA^`om3^>-+wE&P9R m6aOE+|LVK`Z_Ubim$=1@cOA0!i3hJfE-R%Z`Q_QGcmD&5qTQJQ literal 0 HcmV?d00001 diff --git a/ComputeGraph/examples/example2/AppNodes.h b/ComputeGraph/examples/example2/AppNodes.h index d078ee1d..9841f3df 100644 --- a/ComputeGraph/examples/example2/AppNodes.h +++ b/ComputeGraph/examples/example2/AppNodes.h @@ -31,6 +31,7 @@ #include #include "Unzip.h" +#include "SlidingBuffer.h" template class TFLite: public GenericSink diff --git a/ComputeGraph/examples/example3/AppNodes.h b/ComputeGraph/examples/example3/AppNodes.h index 910d296e..6136701b 100644 --- a/ComputeGraph/examples/example3/AppNodes.h +++ b/ComputeGraph/examples/example3/AppNodes.h @@ -42,6 +42,8 @@ using namespace std; #include "ToComplex.h" #include "ToReal.h" +#include "SlidingBuffer.h" +#include "OverlapAndAdd.h" diff --git a/ComputeGraph/examples/example6/AppNodes.h b/ComputeGraph/examples/example6/AppNodes.h index 2179291e..e7c58343 100644 --- a/ComputeGraph/examples/example6/AppNodes.h +++ b/ComputeGraph/examples/example6/AppNodes.h @@ -38,6 +38,7 @@ using namespace std; #include "host/FileSink.h" #include "host/FileSource.h" #include "MFCC.h" +#include "SlidingBuffer.h" diff --git a/Include/arm_math.h b/Include/arm_math.h index b32c4d14..7d696e67 100644 --- a/Include/arm_math.h +++ b/Include/arm_math.h @@ -29,7 +29,7 @@ * \section intro Introduction * * This user manual describes the CMSIS DSP software library, - * a suite of common signal processing functions for use on Cortex-M and Cortex-A processor + * a suite of common compute processing functions for use on Cortex-M and Cortex-A processor * based devices. * * The library is divided into a number of functions each covering a specific category: @@ -49,7 +49,7 @@ * - \ref groupQuaternionMath "Quaternion functions" * * The library has generally separate functions for operating on 8-bit integers, 16-bit integers, - * 32-bit integer and 32-bit floating-point values. + * 32-bit integer and 32-bit floating-point values and 64-bit floating-point values. * * The library is providing vectorized versions of most algorthms for Helium * and of most f32 algorithms for Neon. @@ -59,6 +59,10 @@ * of a buffer. You don't have to modify your buffers but just ensure that the * end of buffer + padding is not outside of a memory region. * + * CMSIS-DSP pack is containing an optional project : The CMSIS-DSP + * Compute Graph. The documentation for this project is available + * on the CMSIS-DSP github repository. + * * \section using Using the Library * * The library is released in source form. It is strongly advised to compile the library using -Ofast to diff --git a/cmsisdsp/cg/static/nodes/StereoToMono.py b/cmsisdsp/cg/static/nodes/InterleavedStereoToMono.py similarity index 90% rename from cmsisdsp/cg/static/nodes/StereoToMono.py rename to cmsisdsp/cg/static/nodes/InterleavedStereoToMono.py index 1fbd5216..20a5bae1 100644 --- a/cmsisdsp/cg/static/nodes/StereoToMono.py +++ b/cmsisdsp/cg/static/nodes/InterleavedStereoToMono.py @@ -1,7 +1,7 @@ ########################################### # Project: CMSIS DSP Library -# Title: StereoToMono.py -# Description: Stereo to mono in Q15 +# Title: InterleavedStereoToMono.py +# Description: Interleaved Stereo to mono in Q15 # # $Date: 06 August 2021 # $Revision: V1.10.0 @@ -29,7 +29,7 @@ from .simu import * import numpy as np import cmsisdsp as dsp -class StereoToMono(GenericNode): +class InterleavedStereoToMono(GenericNode): def __init__(self,inputSize,outputSize,fifoin,fifoout): GenericNode.__init__(self,inputSize,outputSize,fifoin,fifoout) if fifoin.type == np.dtype(np.float32): diff --git a/cmsisdsp/cg/static/scheduler/standard.py b/cmsisdsp/cg/static/scheduler/standard.py index 21d66bc1..e5894256 100644 --- a/cmsisdsp/cg/static/scheduler/standard.py +++ b/cmsisdsp/cg/static/scheduler/standard.py @@ -114,7 +114,7 @@ class NullSink(GenericSink): def typeName(self): return "NullSink" -class StereoToMono(GenericNode): +class InterleavedStereoToMono(GenericNode): def __init__(self,name,theType,outLength): GenericNode.__init__(self,name) self.addInput("i",theType,2*outLength) @@ -122,7 +122,7 @@ class StereoToMono(GenericNode): @property def typeName(self): - return "StereoToMono" + return "InterleavedStereoToMono" class MFCC(GenericNode):