{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:06:12Z","timestamp":1764785172260,"version":"3.37.3"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,8,28]],"date-time":"2023-08-28T00:00:00Z","timestamp":1693180800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,28]],"date-time":"2023-08-28T00:00:00Z","timestamp":1693180800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Artificial and Natural Intelligence Toulouse Institute"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Real-Time Syst"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s11241-023-09407-z","type":"journal-article","created":{"date-parts":[[2023,8,28]],"date-time":"2023-08-28T10:03:23Z","timestamp":1693217003000},"page":"408-437","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Extending a predictable machine learning framework with efficient gemm-based convolution routines"],"prefix":"10.1007","volume":"59","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2603-7947","authenticated-orcid":false,"given":"Iryna","family":"De Albuquerque Silva","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1411-1030","authenticated-orcid":false,"given":"Thomas","family":"Carle","sequence":"additional","affiliation":[]},{"given":"Adrien","family":"Gauffriau","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7265-1839","authenticated-orcid":false,"given":"Claire","family":"Pagetti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,28]]},"reference":[{"key":"9407_CR1","unstructured":"Abadi M, Agarwal A, Barham P, et\u00a0al (2015) TensorFlow: large-scale machine learning on heterogeneous systems. URL https:\/\/www.tensorflow.org\/, software available from tensorflow.org"},{"key":"9407_CR2","unstructured":"Alves E, Bhatt D, Hall B, et\u00a0al (2018) Considerations in assuring safety of increasingly autonomous systems. NASA"},{"key":"9407_CR3","doi-asserted-by":"publisher","unstructured":"Amiri H, Shahbahrami A (2017) High performance implementation of 2D convolution using Intel\u2019s advanced vector extensions. In: 2017 Artificial intelligence and signal processing conference (AISP), pp 25\u201330, https:\/\/doi.org\/10.1109\/AISP.2017.8324097","DOI":"10.1109\/AISP.2017.8324097"},{"key":"9407_CR4","doi-asserted-by":"publisher","unstructured":"Anderson A, Vasudevan A, Keane C, et\u00a0al (2017) Low-memory GEMM-based convolution algorithms for deep neural networks. https:\/\/doi.org\/10.48550\/arXiv.1709.03395, arXiv:1709.03395 [cs]","DOI":"10.48550\/arXiv.1709.03395"},{"key":"9407_CR5","unstructured":"ApacheTVM (2021) microTVM: TVM on bare-metal. URL https:\/\/tvm.apache.org\/docs\/topic\/microtvm\/index.html"},{"key":"9407_CR6","doi-asserted-by":"crossref","unstructured":"Ballabriga C, Cass\u00e9 H, Rochange C, et\u00a0al (2010) OTAWA: an open toolbox for adaptive WCET analysis (regular paper). In: IFIP Workshop on software technologies for future embedded and ubiquitous systems (SEUS)","DOI":"10.1007\/978-3-642-16256-5_6"},{"key":"9407_CR7","doi-asserted-by":"publisher","unstructured":"Bhattacharyya S, Cofer D, Musliner D, et\u00a0al (2015) Certification considerations for adaptive systems. 2015 International conference on unmanned aircraft systems, ICUAS 2015 pp 270\u2013279. https:\/\/doi.org\/10.1109\/ICUAS.2015.7152300","DOI":"10.1109\/ICUAS.2015.7152300"},{"key":"9407_CR8","unstructured":"Chellapilla K, Puri S, Simard P (2006) High performance convolutional neural networks for document processing. In: Lorette G (ed) Tenth international workshop on frontiers in handwriting recognition, Universit\u00e9 de Rennes 1. Suvisoft, La Baule (France), URL https:\/\/hal.inria.fr\/inria-00112631, http:\/\/www.suvisoft.com"},{"key":"9407_CR9","doi-asserted-by":"crossref","unstructured":"Chen T, Moreau T, Jiang Z, et\u00a0al (2018a) TVM: end-to-end optimization stack for deep learning. CoRR arXiv:abs\/1802.04799","DOI":"10.1145\/3149166.3149174"},{"key":"9407_CR10","unstructured":"Chen T, Zheng L, Yan E, et\u00a0al (2018b) Learning to optimize tensor programs. In: Proceedings of the 32nd international conference on neural information processing systems. Curran Associates Inc., Red Hook, NY, USA, NIPS\u201918, p 3393-3404"},{"key":"9407_CR11","unstructured":"Chetlur S, Woolley C, Vandermersch P, et\u00a0al (2014) cuDNN: efficient primitives for deep learning. CoRR arXiv:abs\/1410.0759"},{"key":"9407_CR12","unstructured":"Chichin S, Portes D, Blunder M, et\u00a0al (2020) Capability to embed deep neural networks: study on CPU processor in avionics context. In: 10th European congress embedded real time systems (ERTS 2020)"},{"key":"9407_CR13","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1007\/978-3-319-11179-7_36","volume-title":"Artificial neural networks and machine learning - ICANN 2014","author":"J Cong","year":"2014","unstructured":"Cong J, Xiao B (2014) Minimizing computation in convolutional neural networks. In: Wermter S, Weber C, Duch W et al (eds) Artificial neural networks and machine learning - ICANN 2014. Springer, Cham, pp 281\u2013290"},{"issue":"104","key":"9407_CR14","first-page":"182","volume":"100","author":"R Conlin","year":"2021","unstructured":"Conlin R, Erickson K, Abbate J et al (2021) Keras2c: a library for converting keras neural networks to real-time compatible C. Eng Appl Artif Intell 100(104):182","journal-title":"Eng Appl Artif Intell"},{"key":"9407_CR15","unstructured":"developers OR (2021) Onnx runtime. https:\/\/onnxruntime.ai\/"},{"issue":"1","key":"9407_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/77626.79170","volume":"16","author":"JJ Dongarra","year":"1990","unstructured":"Dongarra JJ, Du Croz J, Hammarling S et al (1990) A set of level 3 basic linear algebra subprograms. ACM Trans Math Softw 16(1):1\u201317. https:\/\/doi.org\/10.1145\/77626.79170","journal-title":"ACM Trans Math Softw"},{"key":"9407_CR17","unstructured":"Dukhan M (2019) The indirect convolution algorithm. CoRR arXiv:abs\/1907.02129"},{"key":"9407_CR18","unstructured":"EUROCAE WG-114\/SAE joint group (2021) Certification\/approval of aeronautical systems based on AI. On going standardization"},{"key":"9407_CR19","unstructured":"Gholami A, Kim S, Dong Z, et\u00a0al (2021) A survey of quantization methods for efficient neural network inference. CoRR arXiv:abs\/2103.13630"},{"key":"9407_CR20","unstructured":"Gong Y, Liu L, Yang M, et\u00a0al (2014) Compressing deep convolutional networks using vector quantization. CoRR arXiv:abs\/1412.6115"},{"issue":"3","key":"9407_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1356052.1356053","volume":"34","author":"K Goto","year":"2008","unstructured":"Goto K, van de Geijn RA (2008) Anatomy of high-performance matrix multiplication. ACM Trans Math Softw 34(3):1\u201325. https:\/\/doi.org\/10.1145\/1356052.1356053","journal-title":"ACM Trans Math Softw"},{"key":"9407_CR22","unstructured":"Han S, Mao H, Dally WJ (2016) Deep compression: Compressing deep neural network with pruning, trained quantization and huffman coding. In: Bengio Y, LeCun Y (eds) 4th International conference on learning representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, conference track proceedings, arXiv:org\/abs\/1510.00149"},{"key":"9407_CR23","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1016\/j.eswa.2019.03.029","volume":"129","author":"E Hoseinzade","year":"2019","unstructured":"Hoseinzade E, Haratizadeh S (2019) CNNpred: CNN-based stock market prediction using a diverse set of variables. Expert Syst Appl 129:273\u2013285","journal-title":"Expert Syst Appl"},{"key":"9407_CR24","doi-asserted-by":"publisher","unstructured":"IEEE (2019) IEEE Standard for Floating-Point Arithmetic. IEEE Std 754-2019 (Revision of IEEE 754-2008) pp 1\u201384. https:\/\/doi.org\/10.1109\/IEEESTD.2019.8766229","DOI":"10.1109\/IEEESTD.2019.8766229"},{"key":"9407_CR25","doi-asserted-by":"publisher","unstructured":"Jia Z, Padon O, Thomas J, et\u00a0al (2019) TASO. In: Proceedings of the 27th ACM symposium on operating systems principles. ACM, https:\/\/doi.org\/10.1145\/3341301.3359630","DOI":"10.1145\/3341301.3359630"},{"key":"9407_CR26","unstructured":"Kalray (2021) MPPA\u00ae Coolidge$$^{{\\rm TM}}$$ Processor - white paper. https:\/\/www.kalrayinc.com\/documentation\/"},{"key":"9407_CR27","first-page":"95","volume-title":"Encyclopedia of parallel computing","author":"RK Karmani","year":"2011","unstructured":"Karmani RK, Agha G, Squillante MS et al (2011) ATLAS (Automatically tuned linear algebra software). Encyclopedia of parallel computing. Springer, New York, pp 95\u2013101"},{"key":"9407_CR28","unstructured":"Krizhevsky A (2009) Learning multiple layers of features from tiny images. Tech. Rep.\u00a00, University of Toronto"},{"key":"9407_CR29","doi-asserted-by":"crossref","unstructured":"Lattner C, Amini M, Bondhugula U, et\u00a0al (2021) MLIR: scaling compiler infrastructure for domain specific computation. In: Lee JW, Soffa ML, Zaks A (eds) International symposium on code generation and optimization, (CGO), pp 2\u201314","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"9407_CR30","doi-asserted-by":"publisher","unstructured":"Lavin A, Gray S (2016) Fast algorithms for convolutional neural networks. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 4013\u20134021, https:\/\/doi.org\/10.1109\/CVPR.2016.435","DOI":"10.1109\/CVPR.2016.435"},{"issue":"4","key":"9407_CR31","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser BE, Denker JS et al (1989) Backpropagation applied to handwritten zip code recognition. Neural Comput 1(4):541\u2013551","journal-title":"Neural Comput"},{"key":"9407_CR32","doi-asserted-by":"crossref","unstructured":"Li C, Yang Y, Feng M, et\u00a0al (2016) Optimizing memory efficiency for deep convolutional neural networks on GPUs. In: Proceedings of the international conference for high performance computing, networking, storage and analysis, SC 2016","DOI":"10.1109\/SC.2016.53"},{"key":"9407_CR33","doi-asserted-by":"publisher","unstructured":"Lin S, Liu N, Nazemi M, et\u00a0al (2018) FFT-based deep learning deployment in embedded systems. In: 2018 Design, automation and test in Europe conference and exhibition (DATE, pp 1045\u20131050, https:\/\/doi.org\/10.23919\/DATE.2018.8342166","DOI":"10.23919\/DATE.2018.8342166"},{"key":"9407_CR34","doi-asserted-by":"publisher","unstructured":"Liu Y, Wang Y, Yu R, et\u00a0al (2018) Optimizing CNN Model Inference on CPUs. https:\/\/doi.org\/10.48550\/ARXIV.1809.02697, arXiv:org\/abs\/1809.02697","DOI":"10.48550\/ARXIV.1809.02697"},{"issue":"2","key":"9407_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2925987","volume":"43","author":"TM Low","year":"2016","unstructured":"Low TM, Igual FD, Smith TM et al (2016) Analytical modeling is enough for high-performance BLIS. ACM Trans Math Softw 43(2):1\u201318. https:\/\/doi.org\/10.1145\/2925987","journal-title":"ACM Trans Math Softw"},{"key":"9407_CR36","unstructured":"Mathieu M, Henaff M, LeCun Y (2014) Fast training of convolutional networks through FFTS: International conference on learning representations (ICLR2014), cbls, april 2014. 2nd International conference on learning representations, ICLR 2014 ; Conference date: 14-04-2014 through 16-04-2014"},{"key":"9407_CR37","unstructured":"NVIDIA (2021) Tensorrt documentation"},{"key":"9407_CR38","doi-asserted-by":"publisher","unstructured":"Park H, Kim D, Ahn J, et\u00a0al (2016) Zero and data reuse-aware fast convolution for deep neural networks on GPU. In: Proceedings of the eleventh IEEE\/ACM\/IFIP international conference on hardware\/software codesign and system synthesis. Association for computing machinery, New York, NY, USA, CODES \u201916, https:\/\/doi.org\/10.1145\/2968456.2968476,","DOI":"10.1145\/2968456.2968476"},{"key":"9407_CR39","unstructured":"Paszke A, Gross S, Massa F, et\u00a0al (2019) PyTorch: an imperative style, high-performance deep learning library. In: Wallach H, Larochelle H, Beygelzimer A, et\u00a0al (eds) Advances in neural information processing systems 32. p 8024\u20138035"},{"key":"9407_CR40","first-page":"1","volume":"13","author":"H Pearce","year":"2020","unstructured":"Pearce H, Yang X, Roop PS et al (2020) Designing neural networks for real-time systems. IEEE Embed Syst Lett 13:1\u20131","journal-title":"IEEE Embed Syst Lett"},{"key":"9407_CR41","doi-asserted-by":"publisher","DOI":"10.1145\/3549526","author":"J Perez-Cerrolaza","year":"2022","unstructured":"Perez-Cerrolaza J, Abella J, Kosmidis L et al (2022) GPU devices for safety-critical systems: a survey. ACM Comput Surv. https:\/\/doi.org\/10.1145\/3549526","journal-title":"ACM Comput Surv"},{"key":"9407_CR42","unstructured":"Pompougnac H, Beaugnon U, Cohen A, et\u00a0al (2020) From SSA to synchronous concurrency and back. Research report RR-9380, INRIA Sophia Antipolis - M\u00e9diterran\u00e9e (France), URL https:\/\/hal.inria.fr\/hal-03043623"},{"key":"9407_CR43","doi-asserted-by":"crossref","unstructured":"Pujol R, Jorba J, Tabani H, et\u00a0al (2022) Vector extensions in cots processors to increase guaranteed performance in real-time systems. ACM Trans Embed Comput Syst","DOI":"10.1145\/3561054"},{"issue":"4","key":"9407_CR44","first-page":"1595","volume":"34","author":"PP Ray","year":"2022","unstructured":"Ray PP (2022) A review on tinyml: state-of-the-art and prospects. J King Saud Univ Comput Inf Sci 34(4):1595\u20131623","journal-title":"J King Saud Univ Comput Inf Sci"},{"key":"9407_CR45","unstructured":"RTCA\/EUROCAE (2011) DO-178C\/ED-12C - Software considerations in airborne systems and equipment certification"},{"issue":"9","key":"9407_CR46","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1016\/j.sysarc.2015.04.002","volume":"61","author":"M Schoeberl","year":"2015","unstructured":"Schoeberl M, Abbaspour S, Akesson B et al (2015) T-crest: time-predictable multi-core architecture for embedded systems. J Syst Archit 61(9):449\u2013471","journal-title":"J Syst Archit"},{"key":"9407_CR47","doi-asserted-by":"crossref","unstructured":"Sentieys O, Filip S, Briand D, et\u00a0al (2021) Adequatedl: approximating deep learning accelerators. In: 24th International symposium on design and diagnostics of electronic circuits systems (DDECS 21)","DOI":"10.1109\/DDECS52668.2021.9417026"},{"key":"9407_CR48","unstructured":"Silva IDA, Carle T, Gauffriau A, et\u00a0al (2022) ACETONE: predictable programming framework for ML applications in safety-critical systems. In: 34th Euromicro conference on real-time systems, ECRTS 2022, July 5-8, 2022, Modena, Italy, pp 3:1\u20133:19"},{"key":"9407_CR49","unstructured":"Stahl R (2021) $$\\mu$$TVM StaticRT CodeGen. URL https:\/\/github.com\/tum-ei-eda\/utvm_staticrt_codegen"},{"key":"9407_CR50","unstructured":"TensorFlow (2022) Simple audio recognition: recognizing keywords. URL https:\/\/www.tensorflow.org\/tutorials\/audio\/simple_audio"},{"key":"9407_CR51","unstructured":"Texas Instruments (2013) TCI6630K2L Multicore DSP+ARM KeyStone II System-on-Chip. Tech. Rep. SPRS893E, Texas Instruments Incorporated"},{"key":"9407_CR52","unstructured":"The Khronos NNEF Working Group (2018) Neural network exchange format"},{"key":"9407_CR53","doi-asserted-by":"publisher","DOI":"10.1145\/3570641","author":"N Tollenaere","year":"2022","unstructured":"Tollenaere N, Iooss G, Pouget S et al (2022) Autotuning convolutions is easier than you think. ACM Trans Archit Code Optim. https:\/\/doi.org\/10.1145\/3570641","journal-title":"ACM Trans Archit Code Optim"},{"issue":"3","key":"9407_CR54","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2764454","volume":"41","author":"FG Van Zee","year":"2015","unstructured":"Van Zee FG, van de Geijn RA (2015) BLIS: a framework for rapidly instantiating BLAS functionality. ACM Trans Math Softw 41(3):1\u201333","journal-title":"ACM Trans Math Softw"},{"key":"9407_CR55","unstructured":"Warden P (2018) Speech commands: a dataset for limited-vocabulary speech recognition. CoRR arXiv:abs\/1804.03209"},{"issue":"1\u20132","key":"9407_CR56","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/s0167-8191(00)00087-9","volume":"27","author":"RC Whaley","year":"2001","unstructured":"Whaley RC, Petitet A, Dongarra JJ (2001) Automated empirical optimizations of software and the ATLAS project. Parallel Comput 27(1\u20132):3\u201335. https:\/\/doi.org\/10.1016\/s0167-8191(00)00087-9","journal-title":"Parallel Comput"},{"key":"9407_CR57","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1347375.1347389","volume":"7","author":"R Wilhelm","year":"2008","unstructured":"Wilhelm R, Engblom J, Ermedahl A et al (2008) The worst-case execution-time problem-overview of methods and survey of tools. ACM Trans Embed Comput Syst 7:1\u201353","journal-title":"ACM Trans Embed Comput Syst"},{"key":"9407_CR58","unstructured":"Xianyi Z, Qian W, Saar W (2011) Openblas: an optimized BLAS library. URL https:\/\/www.openblas.net\/"},{"key":"9407_CR59","unstructured":"Zhang J, Franchetti F, Low TM (2018) High performance zero-memory overhead direct convolutions. In: Dy J, Krause A (eds) Proceedings of the 35th international conference on machine learning, pp 5776\u20135785, URl https:\/\/proceedings.mlr.press\/v80\/zhang18d.html"},{"key":"9407_CR60","doi-asserted-by":"publisher","unstructured":"Zheng L, Jia C, Sun M, et\u00a0al (2020) Ansor : generating high-performance tensor programs for deep learning. https:\/\/doi.org\/10.48550\/ARXIV.2006.06762, arXiv:org\/abs\/2006.06762","DOI":"10.48550\/ARXIV.2006.06762"}],"container-title":["Real-Time Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11241-023-09407-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11241-023-09407-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11241-023-09407-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,8]],"date-time":"2023-09-08T20:34:30Z","timestamp":1694205270000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11241-023-09407-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,28]]},"references-count":60,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["9407"],"URL":"https:\/\/doi.org\/10.1007\/s11241-023-09407-z","relation":{},"ISSN":["0922-6443","1573-1383"],"issn-type":[{"type":"print","value":"0922-6443"},{"type":"electronic","value":"1573-1383"}],"subject":[],"published":{"date-parts":[[2023,8,28]]},"assertion":[{"value":"26 July 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 August 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}