{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:44:26Z","timestamp":1766220266091,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754661","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"774-783","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing NumPy with SVE Acceleration on ARM Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1401-4876","authenticated-orcid":false,"given":"Kuldeep","family":"Pal","sequence":"first","affiliation":[{"name":"CDAC, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9676-8796","authenticated-orcid":false,"given":"Aniket P.","family":"Garade","sequence":"additional","affiliation":[{"name":"CDAC, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7836-7037","authenticated-orcid":false,"given":"Deepika H.","family":"V","sequence":"additional","affiliation":[{"name":"CDAC, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1727-556X","authenticated-orcid":false,"given":"Haribabu","family":"P","sequence":"additional","affiliation":[{"name":"CDAC, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3141-077X","authenticated-orcid":false,"given":"S. A.","family":"Kumar","sequence":"additional","affiliation":[{"name":"CDAC, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5510-2276","authenticated-orcid":false,"given":"S. D.","family":"Sudarsan","sequence":"additional","affiliation":[{"name":"CDAC, Bengaluru, India"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.5555\/323215"},{"key":"e_1_3_3_1_3_2","volume-title":"Arm\u00ae Architecture Reference Manual Supplement: The Scalable Vector Extension (SVE), for Armv8-A","author":"Limited Arm","year":"2021","unstructured":"Arm Limited. 2021. Arm\u00ae Architecture Reference Manual Supplement: The Scalable Vector Extension (SVE), for Armv8-A. https:\/\/developer.arm.com\/documentation\/ddi0584\/latest\/"},{"key":"e_1_3_3_1_4_2","unstructured":"Arm Limited. 2023. Comparison of ARM Neon and SVE. https:\/\/developer.arm.com\/documentation"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","unstructured":"A. Armejach H. Caminal J.M. Cebrian et\u00a0al. 2020. Using Arm\u2019s Scalable Vector Extension on Stencil Codes. Journal of Supercomputing 76 (2020) 2039\u20132062. 10.1007\/s11227-019-02842-5","DOI":"10.1007\/s11227-019-02842-5"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"Berenger Bramas. 2021. A Fast Vectorized Sorting Implementation Based on the ARM Scalable Vector Extension (SVE). PeerJ Computer Science 7 (2021) e769. 10.7717\/peerj-cs.769","DOI":"10.7717\/peerj-cs.769"},{"key":"e_1_3_3_1_7_2","unstructured":"Bine Brank. 2023. Vector Length Agnostic SIMD Parallelism on Modern Processor Architectures with the Focus on Arm\u2019s SVE. Ph.\u00a0D. Dissertation. Ph. D. thesis Bergische Universit\u00e4t Wuppertal."},{"key":"e_1_3_3_1_8_2","volume-title":"Scientific Computing with Python: High-performance scientific computing with NumPy, SciPy, and pandas","author":"Fuhrer Claus","year":"2021","unstructured":"Claus Fuhrer, Jan\u00a0Erik Solem, and Olivier Verdier. 2021. Scientific Computing with Python: High-performance scientific computing with NumPy, SciPy, and pandas. Packt Publishing Ltd."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC62836.2024.10938457"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Kazushige Goto and Robert Van De\u00a0Geijn. 2008. High-performance implementation of the level-3 BLAS. ACM Transactions on Mathematical Software (TOMS) 35 1 (2008) 1\u201314.","DOI":"10.1145\/1377603.1377607"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00024"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","unstructured":"Charles\u00a0R. Harris K.\u00a0Jarrod Millman St\u00e9fan\u00a0J. van\u00a0der Walt Ralf Gommers Pauli Virtanen David Cournapeau Eric Wieser Julian Taylor Sebastian Berg Nathaniel\u00a0J. Smith Robert Kern Matti Picus Stephan Hoyer Marten\u00a0H. van Kerkwijk Matthew Brett Allan Haldane Jaime\u00a0Fern\u00e1ndez del R\u00edo Mark Wiebe Pearu Peterson Pierre G\u00e9rard-Marchant Kevin Sheppard Tyler Reddy Warren Weckesser Hameer Abbasi Christoph Gohlke and Travis\u00a0E. Oliphant. 2020. Array Programming with NumPy. Nature 585 7825 (2020) 357\u2013362. 10.1038\/s41586-020-2649-2","DOI":"10.1038\/s41586-020-2649-2"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Justin Holewinski Ragavendar Ramamurthi Mahesh Ravishankar Naznin Fauzia Louis-No\u00ebl Pouchet Atanas Rountev and P. Sadayappan. 2012. Dynamic Trace-Based Analysis of Vectorization Potential of Applications. SIGPLAN Notices 47 6 (2012) 371\u2013382. 10.1145\/2345156.2254108","DOI":"10.1145\/2345156.2254108"},{"key":"e_1_3_3_1_14_2","unstructured":"Intel Corporation and Arm Ltd.2023. Intel\u00ae AVX-512 Instructions and Arm Neon\u2122 Technology. https:\/\/www.intel.com\/content\/www\/us\/en\/develop\/documentation\/cpp-compiler-developer-guide-and-reference\/top\/compiler-reference\/intrinsics\/intrinsics-for-intel-advanced-vector-extensions-512-intel-avx-512-instructions.html"},{"key":"e_1_3_3_1_15_2","unstructured":"Arm Ltd.2022. High Performance Computing with Arm: Scaling SVE. https:\/\/www.arm.com\/products\/silicon-ip-cpu\/neoverse\/neoverse-v1."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614255"},{"key":"e_1_3_3_1_17_2","unstructured":"NumPy Community. 2024. Tracking Issue for ARM SVE Support in NumPy. https:\/\/github.com\/numpy\/numpy\/issues\/18239."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Evann Regnault and Berenger Bramas. 2023. SPC5: an Efficient SpMV Framework Vectorized Using ARM SVE and x86 AVX-512. arxiv:https:\/\/arXiv.org\/abs\/2307.14774\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2307.14774","DOI":"10.2298\/CSIS230819005R"},{"key":"e_1_3_3_1_19_2","unstructured":"Google Research. 2023. Highway: Fast Portable SIMD via Runtime Dispatch. https:\/\/github.com\/google\/highway."},{"key":"e_1_3_3_1_20_2","unstructured":"Chandan Sharma Rakshith GB Ajay\u00a0Kumar Patel Dhanus\u00a0M Lal Darshan Patel Ragesh Hajela Masahiro Doteguchi and Priyanka Sharma. 2025. oneDAL Optimization for ARM Scalable Vector Extension: Maximizing Efficiency for High-Performance Data Science. arxiv:https:\/\/arXiv.org\/abs\/2504.04241\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2504.04241"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"Naoki Shibata and Francesco Petrogalli. 2020. SLEEF: A Portable Vectorized Library of C Standard Mathematical Functions. IEEE Transactions on Parallel and Distributed Systems 31 6 1316\u20131327. 10.1109\/tpds.2019.2960333","DOI":"10.1109\/tpds.2019.2960333"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","unstructured":"Nigel Stephens Stuart Biles Matthias Boettcher Jacob Eapen Mbou Eyole Giacomo Gabrielli Matt Horsnell Grigorios Magklis Alejandro Martinez Nathanael Premillieu Alastair Reid Alejandro Rico and Paul Walker. 2017. The ARM Scalable Vector Extension. IEEE Micro 37 2 (March 2017) 26\u201339. 10.1109\/MM.2017.35","DOI":"10.1109\/MM.2017.35"},{"key":"e_1_3_3_1_23_2","unstructured":"NumPy\u00a0Core Team. 2023. Discussion: SIMD and ARM SVE Support. https:\/\/github.com\/numpy\/numpy\/issues\/17060."},{"key":"e_1_3_3_1_24_2","unstructured":"Pauli Virtanen and NumPy Developers. 2023. NumPy SIMD Optimizations and ARM SVE. https:\/\/github.com\/numpy\/numpy\/pull\/18085."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS56603.2022.00073"},{"key":"e_1_3_3_1_26_2","unstructured":"Margaret Wright and al.2010. The opportunities and challenges of exascale computing. (2010)."}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754661","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:35Z","timestamp":1766219975000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754661"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":25,"alternative-id":["10.1145\/3754598.3754661","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754661","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}