{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T15:55:02Z","timestamp":1780588502954,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T00:00:00Z","timestamp":1715040000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Italian Ministry of University and Research (MUR)","award":["ICAC Spoke 1"],"award-info":[{"award-number":["ICAC Spoke 1"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,7]]},"DOI":"10.1145\/3649153.3649210","type":"proceedings-article","created":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T10:21:29Z","timestamp":1719915689000},"page":"147-154","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["QR-PULP: Streamlining QR Decomposition for RISC-V Parallel Ultra-Low-Power Platforms"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7092-6227","authenticated-orcid":false,"given":"Amirhossein","family":"Kiamarzi","sequence":"first","affiliation":[{"name":"Universit\u00e0 di Bologna, Bologna, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3698-8615","authenticated-orcid":false,"given":"Davide","family":"Rossi","sequence":"additional","affiliation":[{"name":"Universit\u00e0 di Bologna, Bologna, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9221-4633","authenticated-orcid":false,"given":"Giuseppe","family":"Tagliavini","sequence":"additional","affiliation":[{"name":"Universit\u00e0 di Bologna, Bologna, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Intel FPGA High Level Synthesis Compiler Pro Edition. https:\/\/www.intel.com\/content\/www\/us\/en\/docs\/programmable\/683349\/22-3\/pro-edition-reference-manual.html. Accessed: 23-08-2023."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. Intel\u00ae-Optimized Math Library for Numerical Computing on CPUs & GPUs. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/onemkl.html. Accessed: 22-08-2023."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. PULP Platform. https:\/\/pulp-platform.org\/. Accessed: [2023]."},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. vitis-hls. https:\/\/docs.xilinx.com\/r\/en-US\/Vitis_Libraries\/index.html. Accessed: 23-08-2023."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3390\/math11112461"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"E. Anderson Z. Bai C. Bischof S. Blackford J. Demmel J. Dongarra J. Du Croz A. Greenbaum S. Hammarling A. McKenney and D. Sorensen. 1999. LAPACK Users' Guide (third ed.). Society for Industrial and Applied Mathematics Philadelphia PA.","DOI":"10.1137\/1.9780898719604"},{"key":"e_1_3_2_1_7_1","unstructured":"ARM. 2010. Eigen v3. https:\/\/arm-software.github.io\/CMSIS-DSP\/main\/index.html."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2008.10.002"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3306"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2011.10.002"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Parallel and Distributed Computing: Applications and Technologies","author":"Fukaya Takeshi","unstructured":"Takeshi Fukaya. 2022. Distributed Parallel Tall-Skinny QR Factorization: Performance Evaluation of Various Algorithms on Various Systems. In International Conference on Parallel and Distributed Computing: Applications and Technologies. Springer, 275--287."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2017.2654506"},{"key":"e_1_3_2_1_13_1","volume-title":"Van Loan","author":"Golub Gene H.","year":"2012","unstructured":"Gene H. Golub and Charles F. Van Loan. 2012. Matrix Computations (4th ed.). Johns Hopkins University Press."},{"key":"e_1_3_2_1_14_1","unstructured":"Ga\u00ebl Guennebaud Beno\u00eet Jacob et al. 2010. Eigen v3. http:\/\/eigen.tuxfamily.org."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.22283\/qbs.2019.38.2.121"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.2197\/ipsjjip.27.831"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2018.2868930"},{"key":"e_1_3_2_1_18_1","volume-title":"Richards","author":"Kerr Andrew","year":"2009","unstructured":"Andrew Kerr, Dan Campbell, and Mark A. Richards. 2009. QR decomposition on GPUs. In GPGPU-2."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1080\/03081087.2016.1267104"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3174243.3174273"},{"key":"e_1_3_2_1_21_1","volume-title":"A block Cholesky-LU-based QR factorization for rectangular matrices. Numerical Linear Algebra with Applications","author":"Borne Sabine Le","year":"2023","unstructured":"Sabine Le Borne. 2023. A block Cholesky-LU-based QR factorization for rectangular matrices. Numerical Linear Algebra with Applications (2023), e2497."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2803820"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.23919\/DATE56975.2023.10136916"},{"key":"e_1_3_2_1_24_1","first-page":"861","article-title":"High-throughput FPGA implementation of QR decomposition","volume":"62","author":"Mu\u00f1oz Sergio D","year":"2015","unstructured":"Sergio D Mu\u00f1oz and Javier Hormigo. 2015. High-throughput FPGA implementation of QR decomposition. IEEE Transactions on Circuits and Systems II: Express Briefs 62, 9 (2015), 861--865.","journal-title":"IEEE Transactions on Circuits and Systems II: Express Briefs"},{"key":"e_1_3_2_1_25_1","volume-title":"QR Factorization of Block Low-Rank Matrices on Multi-instance GPU. In International Conference on Parallel and Distributed Computing: Applications and Technologies. Springer, 359--369","author":"Ohshima Satoshi","year":"2022","unstructured":"Satoshi Ohshima, Akihiro Ida, Rio Yokota, and Ichitaro Yamazaki. 2022. QR Factorization of Block Low-Rank Matrices on Multi-instance GPU. In International Conference on Parallel and Distributed Computing: Applications and Technologies. Springer, 359--369."},{"key":"e_1_3_2_1_26_1","unstructured":"PULP open-source community. 2020. PULP-DSP. https:\/\/github.com\/pulp-platform\/pulp-dsp."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1002\/wcms.1614"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2021.3114881"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2968456.2974004"},{"key":"e_1_3_2_1_30_1","unstructured":"STMicroelectronics. 2020. STM32f4 Series. https:\/\/www.st.com\/resource\/en\/datasheet\/stm32f405rg.pdf"},{"key":"e_1_3_2_1_31_1","unstructured":"STMicroelectronics. 2020. STM32L4 Series. https:\/\/www.st.com\/en\/microcontrollersmicroprocessors\/stm32l4-series\/documentation.html"},{"key":"e_1_3_2_1_32_1","unstructured":"STMicroelectronics. 2023. STM32h7 Series. https:\/\/www.st.com\/resource\/en\/datasheet\/stm32h743vi.pdf"},{"key":"e_1_3_2_1_33_1","unstructured":"GreenWaves Technologies. 2021. GAP9: Low-power System-on-Chip for edge AI and IoT applications."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2019.102571"},{"key":"e_1_3_2_1_35_1","volume-title":"Matrix computations (Johns Hopkins studies in mathematical sciences). Matrix Computations 5","author":"Van Loan Charles F","year":"1996","unstructured":"Charles F Van Loan and G Golub. 1996. Matrix computations (Johns Hopkins studies in mathematical sciences). Matrix Computations 5 (1996)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/1596532.1596535"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3176671"}],"event":{"name":"CF '24: 21st ACM International Conference on Computing Frontiers","location":"Ischia Italy","acronym":"CF '24","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["Proceedings of the 21st ACM International Conference on Computing Frontiers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649153.3649210","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649153.3649210","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:50:02Z","timestamp":1750287002000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649153.3649210"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,7]]},"references-count":37,"alternative-id":["10.1145\/3649153.3649210","10.1145\/3649153"],"URL":"https:\/\/doi.org\/10.1145\/3649153.3649210","relation":{},"subject":[],"published":{"date-parts":[[2024,5,7]]},"assertion":[{"value":"2024-07-02","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}