{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T22:56:00Z","timestamp":1776984960096,"version":"3.51.4"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,3,31]],"date-time":"2016-03-31T00:00:00Z","timestamp":1459382400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["EURASIP J. Adv. Signal Process."],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1186\/s13634-016-0336-0","type":"journal-article","created":{"date-parts":[[2016,3,31]],"date-time":"2016-03-31T01:40:25Z","timestamp":1459388425000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Instruction scheduling heuristic for an efficient FFT in VLIW processors with balanced resource usage"],"prefix":"10.1186","volume":"2016","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3032-6792","authenticated-orcid":false,"given":"Mounir","family":"Bahtat","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Said","family":"Belkouch","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philippe","family":"Elleaume","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philippe","family":"Le Gall","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,3,31]]},"reference":[{"key":"336_CR1","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1090\/S0025-5718-1965-0178586-1","volume":"19","author":"JW Cooley","year":"1965","unstructured":"JW Cooley, JW Tukey, An algorithm for the machine calculation of complex Fourier series. Math. Comput. 19, 297\u2013301 (1965)","journal-title":"Math. Comput."},{"issue":"2","key":"336_CR2","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1109\/TAU.1969.1162043","volume":"17","author":"GD Bergland","year":"1969","unstructured":"GD Bergland, A radix-eight fast-Fourier transform subroutine for real-valued series. IEEE Trans. On Electroacoust. 17(2), 138\u2013144 (1969)","journal-title":"IEEE Trans. On Electroacoust."},{"issue":"2","key":"336_CR3","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1109\/TAU.1969.1162042","volume":"1","author":"RC Singleton","year":"1969","unstructured":"RC Singleton, An algorithm for computing the mixed radix fast Fourier transform. IEEE Trans. Audio Electroacoust. 1(2), 93\u2013103 (1969)","journal-title":"IEEE Trans. Audio Electroacoust."},{"key":"336_CR4","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1049\/el:19840012","volume":"20","author":"P Duhamel","year":"1984","unstructured":"P Duhamel, H Hollmann, Split radix FFT algorithm. Electronics Letters 20, 14\u201316 (1984)","journal-title":"Electronics Letters"},{"issue":"5","key":"336_CR5","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1109\/97.917698","volume":"8","author":"D Takahashi","year":"2001","unstructured":"D Takahashi, An extended split-radix FFT algorithm. IEEE Signal Processing Letters 8(5), 145\u2013147 (2001)","journal-title":"IEEE Signal Processing Letters"},{"key":"336_CR6","doi-asserted-by":"publisher","first-page":"614","DOI":"10.1109\/82.466641","volume":"42","author":"AR Varkonyi-Koczy","year":"1995","unstructured":"AR Varkonyi-Koczy, A recursive fast Fourier transform algorithm. IEEE Trans. on Circuits and Systems, II 42, 614\u2013616 (1995)","journal-title":"IEEE Trans. on Circuits and Systems, II"},{"key":"336_CR7","first-page":"453","volume":"3","author":"A Saidi","year":"1994","unstructured":"A Saidi, Decimation-in-time-frequency FFT algorithm. Proc. ICAPSS 3, 453\u2013456 (1994)","journal-title":"Proc. ICAPSS"},{"issue":"3","key":"336_CR8","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/4.748190","volume":"34","author":"BM Baas","year":"1999","unstructured":"BM Baas, A low-power, high-performance, 1024-point FFT processor. IEEE J. Solid-State Circuits 34(3), 380\u2013387 (1999)","journal-title":"IEEE J. Solid-State Circuits"},{"issue":"1","key":"336_CR9","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1109\/TPDS.2010.125","volume":"22","author":"R Weber","year":"2011","unstructured":"R Weber et al., Comparing hardware accelerators in scientific applications: a case study. IEEE Trans. Parallel Distrib. Syst. 22(1), 58\u201368 (2011). doi: 10.1109\/TPDS.2010.125","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"1","key":"336_CR10","first-page":"436","volume":"21","author":"T Fryza","year":"2012","unstructured":"T Fryza, J Svobodova, F Adamec, R Marsalek, J Prokopec, Overview of parallel platforms for common high performance computing. Radioengineering 21(1), 436\u2013444 (2012)","journal-title":"Radioengineering"},{"key":"336_CR11","doi-asserted-by":"publisher","unstructured":"Jia-Jhe Li, Chi-Bang Kuan, Tung-Yu Wu, and Jenq Kuen Lee. Enabling an OpenCL compiler for embedded multicore DSP systems. In Proceedings of the 2012 41st International Conference on Parallel Processing Workshops (ICPPW '12). (IEEE Computer Society, Washington, DC, USA, 2012), p. 545-552","DOI":"10.1109\/ICPPW.2012.74"},{"key":"336_CR12","unstructured":"Francisco D. Igual, Guillermo Botella, Carlos Garc\u00eda, Manuel Prieto, Francisco Tirado, Robust motion estimation on a low-power multi-core DSP. EURASIP Journal on Advances in Signal Processing. 99, 1-15 (2013)"},{"key":"336_CR13","doi-asserted-by":"publisher","unstructured":"T. Fryza and R. Mego, Low level source code optimizing for single\/multi\/core digital signal processors, Radioelektronika (RADIOELEKTRONIKA), 2013 23rd International Conference, Pardubice, 2013, pp. 288-291. doi: 10.1109\/RadioElek.2013.6530933","DOI":"10.1109\/RadioElek.2013.6530933"},{"key":"336_CR14","unstructured":"JA Fisher, P Faraboschi, C Young, Embedded computing: A VLIW approach to architecture, compilers, and tools. (Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 2005), ISBN: 9780080477541."},{"issue":"5","key":"336_CR15","first-page":"27","volume":"4","author":"V Z\u02c7Zivojnovi\u2019c","year":"1995","unstructured":"V Z\u02c7Zivojnovi\u2019c, Compilers for digital signal processors. DSP & Multimedia Technology Magazine 4(5), 27\u201345 (1995)","journal-title":"DSP & Multimedia Technology Magazine"},{"key":"336_CR16","unstructured":"J. P. Grossman, Compiler and architectural techniques for improving the effectiveness of VLIW compilation.\u00a0[Online]. Available: http:\/\/www.ai.mit.edu\/projects\/aries\/Documents\/vliw.pdf [24-Mars-2016]"},{"issue":"7","key":"336_CR17","doi-asserted-by":"publisher","first-page":"478","DOI":"10.1109\/TC.1981.1675827","volume":"30","author":"JA Fisher","year":"1981","unstructured":"JA Fisher, Trace scheduling: a technique for global microcode compaction. IEEE Trans. Comput. 30(7), 478\u2013490 (1981)","journal-title":"IEEE Trans. Comput."},{"key":"336_CR18","first-page":"318","volume-title":"Software pipelining: an effective scheduling technique for VLIW machines","author":"L Monica","year":"1988","unstructured":"L Monica, Software pipelining: an effective scheduling technique for VLIW machines (Proc. SIGPLAN \u201988 Conference on Programming Language Design and Implementation, Atlanta, 1988), pp. 318\u2013328"},{"key":"336_CR19","series-title":"Proc. 27th Annual International Symposium on Microarchitecture","first-page":"63","volume-title":"Iterative modulo scheduling: an algorithm for software pipelining loops","author":"B Ramakrishna Rau","year":"1994","unstructured":"B Ramakrishna Rau, Iterative modulo scheduling: an algorithm for software pipelining loops. Proc. 27th Annual International Symposium on Microarchitecture, 1994, pp. 63\u201374"},{"key":"336_CR20","doi-asserted-by":"publisher","unstructured":"M. Bahtat, S. Belkouch, P. Elleaume, P. Le Gall, Fast enumeration-based modulo scheduling heuristic for VLIW architectures,\u00a0in 26th International Conference on Microelectronics (ICM), 2014, pp. 116-119, 2014. doi: 10.1109\/ICM.2014.7071820","DOI":"10.1109\/ICM.2014.7071820"},{"key":"336_CR21","doi-asserted-by":"publisher","first-page":"2338","DOI":"10.1109\/TSP.2007.892722","volume":"55","author":"Y Wang","year":"2007","unstructured":"Y Wang, Y Tang, Y Jiang, JG Chung, SS Song, MS Lim, Novel memory reference reduction methods for FFT implementation on DSP processors. IEEE Trans. Signal Process 55, 2338\u20132349 (2007). doi: 10.1109\/TSP.2007.892722","journal-title":"IEEE Trans. Signal Process"},{"key":"336_CR22","first-page":"70","volume-title":"Proc. 16th Int. Symp. Parallel Distrib. Process","author":"Y Jiang","year":"2002","unstructured":"Y Jiang, T Zhou, Y Tang, Y Wang, Twiddle-factor-based FFT algorithm with reduced memory access, in Proc. 16th Int. Symp. Parallel Distrib. Process (IEEE Computer Soc, Washington, 2002), p. 70"},{"key":"336_CR23","doi-asserted-by":"publisher","unstructured":"K.J. Bowers, D.E. Shaw Res, New York, NY, USA; R.A. Lippert, R.O. Dror, D.E. Shaw, Improved twiddle access for fast Fourier transforms. IEEE Trans. Signal Process. 58(3), 1122\u20131130 (2010)","DOI":"10.1109\/TSP.2009.2035984"},{"issue":"12","key":"336_CR24","doi-asserted-by":"publisher","first-page":"6217","DOI":"10.1109\/TSP.2011.2168525","volume":"59","author":"VI Kelefouras","year":"2011","unstructured":"VI Kelefouras, G Athanasiou, N Alachiotis, HE Michail, A Kritikakou, CE Goutis, A methodology for speeding up fast Fourier transform focusing on memory architecture utilization. IEEE Trans. Signal Process 59(12), 6217\u20136226 (2011)","journal-title":"IEEE Trans. Signal Process"},{"key":"336_CR25","volume-title":"Proc. Int. Conf. Acoust., Speech, Signal Process. (ICASSP)","author":"M Frigo","year":"1998","unstructured":"M Frigo, SG Johnson, The fastest Fourier transform in the west, in Proc. Int. Conf. Acoust., Speech, Signal Process. (ICASSP), 1998"},{"key":"336_CR26","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1145\/989393.989457","volume":"39","author":"M Frigo","year":"2004","unstructured":"M Frigo, A fast Fourier transform compiler. SIGLAN Not. 39, 642\u2013655 (2004)","journal-title":"SIGLAN Not."},{"issue":"1","key":"336_CR27","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1109\/TSP.2006.882087","volume":"55","author":"S Johnson","year":"2006","unstructured":"S Johnson, M Frigo, A modified split-radix FFT with fewer arithmetic operations. IEEE Trans. Signal Process 55(1), 111\u2013119 (2006)","journal-title":"IEEE Trans. Signal Process"},{"key":"336_CR28","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/3-540-45545-0_17","volume-title":"Computational Science\u2014ICCS 2001","author":"D Mirkovic","year":"2001","unstructured":"D Mirkovic, L Johnsson, Automatic performance tuning in the UHFFT library, in Computational Science\u2014ICCS 2001 (Springer, New York, 2001), pp. 71\u201380"},{"issue":"19","key":"336_CR29","doi-asserted-by":"publisher","first-page":"4707","DOI":"10.1109\/TSP.2013.2273199","volume":"61","author":"AM Blake","year":"2013","unstructured":"AM Blake, IH Witten, MJ Cree, The fastest Fourier transform in the south. IEEE Trans. Signal Process 61(19), 4707\u20134716 (2013)","journal-title":"IEEE Trans. Signal Process"},{"key":"336_CR30","doi-asserted-by":"publisher","unstructured":"M. Bahtat, S. Belkouch, P. Elleaume, P. Le Gall, Efficient implementation of a complete multi-beam radar coherent-processing on a telecom SoC, in 2014 International Radar Conference (Radar), pp. 1-6, 2014. doi: 10.1109\/RADAR.2014.7060412","DOI":"10.1109\/RADAR.2014.7060412"},{"key":"336_CR31","first-page":"766","volume-title":"Proc. IEEE Parallel Processing Symp","author":"S He","year":"1996","unstructured":"S He, M Torkelson, A new approach to pipeline FFT processor, in Proc. IEEE Parallel Processing Symp, 1996, pp. 766\u2013770"},{"key":"336_CR32","doi-asserted-by":"publisher","unstructured":"J.M. Codina, J. Llosa, A. Gonz\u00e1lez, A comparative study of modulo scheduling techniques, Proceedings of the 16th international conference on Supercomputing ICS 02(2002), 13(1), 97. ACM Press","DOI":"10.1145\/514191.514208"},{"key":"336_CR33","doi-asserted-by":"crossref","unstructured":"RA Huff, Lifetime-sensitive modulo scheduling, In Proc. of the ACM SIGPLAN '93 Conf. on Programming Language Design and Implementation.\u00a0258-267(1993)","DOI":"10.1145\/173262.155115"},{"key":"336_CR34","doi-asserted-by":"publisher","unstructured":"AK Dani, VJ Ramanan, R Govindarajan, Register-sensitive software pipelining. Parallel Processing Symposium, 1998. (IPPS\/SPDP, Orlando, FL, 1998), p. 194-198","DOI":"10.1109\/IPPS.1998.669910"},{"key":"336_CR35","doi-asserted-by":"crossref","unstructured":"J. Llosa, A. Gonz\u00e1lez, E. Ayguad\u00e9, M. Valero, Swing modulo scheduling: a lifetime-sensitive approach, PACT \u203296 Proceedings of the 1996 Conference on Parallel Architectures and Compilation Techniques.\u00a0(Boston, MA, 1996), p. 80-86","DOI":"10.1109\/PACT.1996.554030"},{"issue":"3","key":"336_CR36","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1109\/12.910814","volume":"50","author":"J Llosa","year":"2002","unstructured":"J Llosa, E Ayguade, A Gonzalez, M Valero, J Eckhardt, Lifetime-sensitive modulo scheduling in a production environment. IEEE Trans. Comput. 50(3), 234\u2013249 (2002)","journal-title":"IEEE Trans. Comput."},{"issue":"5","key":"336_CR37","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1109\/TPDS.2004.1278099","volume":"15","author":"J Zalamea","year":"2004","unstructured":"J Zalamea, J Llosa, E Ayguade, M Valero, Register constrained modulo scheduling. IEEE Trans. Parallel Distrib. Syst. 15(5), 417\u2013430 (2004)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"336_CR38","unstructured":"TMS320C6678, Multicore fixed and floating-point digital signal processor, Data Manual, Texas Instruments.\u00a0SPRS691E. March 2014. [Online]. Available: www.ti.com\/lit\/gpn\/tms320c6678 [25-Mars-2016]"},{"issue":"1","key":"336_CR39","first-page":"1","volume":"4","author":"M Tasche","year":"2012","unstructured":"M Tasche, H Zeuner, Improved roundoff error analysis for precomputed twiddle factors. J. Comput. Anal. Appl. 4(1), 1\u201318 (2012)","journal-title":"J. Comput. Anal. Appl."},{"key":"336_CR40","unstructured":"JJ Alter, JO Coleman, Radar digital signal processing, Chapter 25 in Merrill I. Skolnik, Radar Handbook, Third Edition, (McGraw-Hill, 2008)"},{"key":"336_CR41","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1186\/1687-6180-2014-161","volume":"2014","author":"A Klilou","year":"2014","unstructured":"A Klilou, S Belkouch, P Elleaume, P Le Gall, F Bourzeix, MM Hassani, Real-time parallel implementation of pulse-Doppler radar signal processing chain on a massively parallel machine based on multi-core DSP and serial RapidIO interconnect. EURASIP Journal on Advances in Signal Processing 2014, 161 (2014)","journal-title":"EURASIP Journal on Advances in Signal Processing"},{"key":"336_CR42","unstructured":"Texas Instruments. FFT library for C66X floating point devices, C66X FFTLIB, version 2.0. [Online]. Available: http:\/\/www.ti.com\/tool\/FFTLIB [25-Mars-2016]"},{"key":"336_CR43","doi-asserted-by":"publisher","unstructured":"Sang Yoon Park; Nam Ik Cho; Sang Uk Lee; Kichul Kim; Jisung Oh, Design of 2K\/4K\/8K-point FFT processor based on CORDIC algorithm in OFDM receiver, Communications, Computers and signal Processing, 2001. PACRIM. 2001 IEEE Pacific Rim Conference on, vol. 2, no., pp. 457,460 vol. 2, 2001. doi: 10.1109\/PACRIM.2001.953668","DOI":"10.1109\/PACRIM.2001.953668"},{"issue":"2007","key":"336_CR44","first-page":"65","volume":"4599","author":"T Pitk\u00e4nen","year":"2007","unstructured":"T Pitk\u00e4nen, T Partanen, J Takala, Low-power twiddle factor unit for FFT computation. Embedded Computer Systems: Architectures, Modeling, and Simulation Lecture Notes in Computer Science 4599(2007), 65\u201374 (2007)","journal-title":"Embedded Computer Systems: Architectures, Modeling, and Simulation Lecture Notes in Computer Science"},{"key":"336_CR45","volume-title":"An efficient FFT twiddle factor generator","author":"JC Chi","year":"2004","unstructured":"JC Chi, SG Chen, An efficient FFT twiddle factor generator (Proc. European Signal Process. Conf, Vienna, 2004)"}],"container-title":["EURASIP Journal on Advances in Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13634-016-0336-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13634-016-0336-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13634-016-0336-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13634-016-0336-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T01:39:06Z","timestamp":1748828346000},"score":1,"resource":{"primary":{"URL":"https:\/\/asp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13634-016-0336-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3,31]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,12]]}},"alternative-id":["336"],"URL":"https:\/\/doi.org\/10.1186\/s13634-016-0336-0","relation":{},"ISSN":["1687-6180"],"issn-type":[{"value":"1687-6180","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,3,31]]},"article-number":"38"}}