{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:40:13Z","timestamp":1755870013701,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3725778","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"611-624","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["UJOpt: Heuristic Approach for Applying Unroll-and-Jam Optimization and Loop Order Selection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9114-0409","authenticated-orcid":false,"given":"Shilpa","family":"Babalad","sequence":"first","affiliation":[{"name":"Indian Institute of Science, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7202-6860","authenticated-orcid":false,"given":"Shirish K","family":"Shevade","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1550-8121","authenticated-orcid":false,"given":"Matthew Jacob","family":"Thazhuthaveetil","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2517-9994","authenticated-orcid":false,"given":"R","family":"Govindarajan","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bengaluru, Karnataka, India"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2006.37"},{"key":"e_1_3_3_2_3_2","unstructured":"Kahraman Akdemir Martin Dixon Wajdi Feghali Patrick Fay Vinodh Gopal Jim Guilford Erdinc Ozturk Gil Wolrich and Ronen Zohar. 2010. Breakthrough AES performance with intel AES new instructions. White paper June 12 (2010) 217."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/502874.502897"},{"key":"e_1_3_3_2_5_2","volume-title":"4th Gen AMD Processor Architecture","year":"2024","unstructured":"AMD. 2024. 4th Gen AMD Processor Architecture. Technical Report. AMD."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628092"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Mohamed Arafa Bahaa Fahim Sailesh Kottapalli Akhilesh Kumar Lily\u00a0P Looi Sreenivas Mandava Andy Rudoff Ian\u00a0M Steiner Bob Valentine Geetha Vedaraman et\u00a0al. 2019. Cascade lake: Next generation intel xeon scalable processor. IEEE Micro 39 2 (2019) 29\u201336.","DOI":"10.1109\/MM.2019.2899330"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Amir\u00a0H Ashouri Andrea Bignoli Gianluca Palermo Cristina Silvano Sameer Kulkarni and John Cavazos. 2017. Micomp: Mitigating the compiler phase-ordering problem using optimization sub-sequences and machine learning. ACM Transactions on Architecture and Code Optimization (TACO) 14 3 (2017) 1\u201328.","DOI":"10.1145\/3124452"},{"key":"e_1_3_3_2_9_2","unstructured":"Shilpa Babalad Shirish\u00a0K Shevade Matthew\u00a0Jacob Thazhuthaveetil and R Govindarajan. 2023. A Machine Learning Approach to Identify the Best-Performing Loop Order. https:\/\/github.com\/knightlander2023\/OptLoopOrder Technical Report Department of Computer Science and Automation Indian Institute of Science Bengaluru."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"David\u00a0F Bacon Susan\u00a0L Graham and Oliver\u00a0J Sharp. 1994. Compiler transformations for high-performance computing. ACM Computing Surveys (CSUR) 26 4 (1994) 345\u2013420.","DOI":"10.1145\/197405.197406"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Ravi Bhargava and Kai Troester. 2024. AMD Next Generation\" Zen 4\" Core and 4 th Gen AMD EPYC\u2122 Server CPUs. IEEE Micro (2024).","DOI":"10.1109\/MM.2024.3375070"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78791-4_9"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_3_2_14_2","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et\u00a0al. 2018. { TVM} : An automated { End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Corinna Cortes and Vladimir Vapnik. 1995. Support-vector networks. Machine learning 20 3 (1995) 273\u2013297.","DOI":"10.1023\/A:1022627411411"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2007.4629247"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Sylvain Girbal Nicolas Vasilache C\u00e9dric Bastoul Albert Cohen David Parello Marc Sigler and Olivier Temam. 2006. Semi-automatic composition of loop transformations for deep parallelism and memory hierarchies. International Journal of Parallel Programming 34 3 (2006) 261\u2013317.","DOI":"10.1007\/s10766-006-0012-3"},{"key":"e_1_3_3_2_18_2","first-page":"1","volume-title":"Proceedings of the First International Workshop on Polyhedral Compilation Techniques (IMPACT)","volume":"2011","author":"Grosser Tobias","year":"2011","unstructured":"Tobias Grosser, Hongbin Zheng, Raghesh Aloor, Andreas Simb\u00fcrger, Armin Gr\u00f6\u00dflinger, and Louis-No\u00ebl Pouchet. 2011. Polly-Polyhedral optimization in LLVM. In Proceedings of the First International Workshop on Polyhedral Compilation Techniques (IMPACT) , Vol.\u00a02011. 1."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3368826.3377928"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462187"},{"key":"e_1_3_3_2_21_2","unstructured":"Max Kuhn Steve Weston Mark Culp Nathan Coulter Ross Quinlan et\u00a0al. 2015. Package \u2018C50\u2019. CRAN UTC (2015)."},{"key":"e_1_3_3_2_22_2","first-page":"1","volume-title":"The BSD conference","author":"Lattner Chris","year":"2008","unstructured":"Chris Lattner. 2008. LLVM and Clang: Next generation compiler technology. In The BSD conference , Vol.\u00a05. 1\u201320."},{"key":"e_1_3_3_2_23_2","unstructured":"David Meyer Evgenia Dimitriadou Kurt Hornik Andreas Weingessel Friedrich Leisch Chih-Chung Chang Chih-Chen Lin and Maintainer\u00a0David Meyer. 2019. Package \u2018e1071\u2019. The R Journal (2019)."},{"key":"e_1_3_3_2_24_2","unstructured":"David Meyer and FT Wien. 2015. Support vector machines. The Interface to libsvm in package e1071 28 (2015) 20."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454119"},{"key":"e_1_3_3_2_26_2","unstructured":"LN Pouchet. 2012. Polybench: The polyhedral benchmark suite. http:\/\/www. cs. ucla. edu\/pouchet\/software\/polybench."},{"key":"e_1_3_3_2_27_2","unstructured":"LN Pouchet and Scott Grauer-Gray. 2011. PolyBench: The Polyhedral Benchmark suite (2011) Version 3.2. http:\/\/www-roc. inria. fr\/\u00a0 pouchet\/software\/polybench."},{"key":"e_1_3_3_2_28_2","unstructured":"Louis-No\u00ebl Pouchet C. Bastoul and U. Bondhugula. 2019. PoCC: the polyhedral compiler collection. http:\/\/web.cs.ucla.edu\/\u00a0pouchet\/software\/pocc\/."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Louis-No\u00ebl Pouchet Uday Bondhugula C\u00e9dric Bastoul Albert Cohen Jagannathan Ramanujam Ponnuswamy Sadayappan and Nicolas Vasilache. 2011. Loop transformations: convexity pruning and optimization. ACM SIGPLAN Notices 46 1 (2011) 549\u2013562.","DOI":"10.1145\/1925844.1926449"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Kishore\u00a0Kumar Pusukuri Rajiv Gupta and Laxmi\u00a0N Bhuyan. 2012. Thread tranquilizer: Dynamically reducing performance variation. ACM Transactions on Architecture and Code Optimization (TACO) 8 4 (2012) 1\u201321.","DOI":"10.1145\/2086696.2086725"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Peter\u00a0J Rousseeuw and Mia Hubert. 2011. Robust statistics for outlier detection. Wiley interdisciplinary reviews: Data mining and knowledge discovery 1 1 (2011) 73\u201379.","DOI":"10.1002\/widm.2"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/335231.335246"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3168823"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2005.29"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Kevin Stock Louis-No\u00ebl Pouchet and P Sadayappan. 2012. Using machine learning to improve automatic vectorization. ACM Transactions on Architecture and Code Optimization (TACO) 8 4 (2012) 1\u201323.","DOI":"10.1145\/2086696.2086729"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2009.18"},{"key":"e_1_3_3_2_37_2","volume-title":"The definitive guide to GCC","author":"Von\u00a0Hagen William","year":"2011","unstructured":"William Von\u00a0Hagen. 2011. The definitive guide to GCC. Apress."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.5555\/645818.669220"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.5555\/353939"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Qing Yi and Ken Kennedy. 2004. Improving memory hierarchy performance through combined loop interchange and multi-level fusion. The International Journal of High Performance Computing Applications 18 2 (2004) 237\u2013253.","DOI":"10.1177\/1094342004038956"}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3725778","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:02:01Z","timestamp":1755867721000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3725778"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":39,"alternative-id":["10.1145\/3721145.3725778","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3725778","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}