{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:40Z","timestamp":1755907660822,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":69,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T00:00:00Z","timestamp":1740700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,28]]},"DOI":"10.1145\/3710848.3710886","type":"proceedings-article","created":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T06:20:57Z","timestamp":1740723657000},"page":"481-495","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Jigsaw: Toward Conflict-free Vectorized Stencil Computation by Tessellating Swizzled Registers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2050-572X","authenticated-orcid":false,"given":"Yiwei","family":"Zhang","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences, Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1013-1325","authenticated-orcid":false,"given":"Kun","family":"Li","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3406-2907","authenticated-orcid":false,"given":"Liang","family":"Yuan","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3075-3385","authenticated-orcid":false,"given":"Haozhi","family":"Han","sequence":"additional","affiliation":[{"name":"Peking University, Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7520-9640","authenticated-orcid":false,"given":"Yunquan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9107-013X","authenticated-orcid":false,"given":"Ting","family":"Cao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6455-3898","authenticated-orcid":false,"given":"Mao","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,2,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3409964.3461803"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS55109.2022.00010"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/29873.29875"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1562764.1562783"},{"key":"e_1_3_2_1_5_1","unstructured":"Krste Asanovi\u0107 Ras Bodik Bryan Christopher Catanzaro Joseph James Gebis Parry Husbands Kurt Keutzer David A. Patterson William Lester Plishker John Shalf Samuel Webb Williams and Katherine A. Yelick. 2006. The Landscape of Parallel Computing Research: A View from Berkeley. Technical Report UCB\/EECS-2006-183. http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2006\/EECS-2006-183.html"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.107"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.103"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751224"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356162"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627535.3638476"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","unstructured":"Matthias Christen Olaf Schenk and Helmar Burkhart. 2011. PATUS: A Code Generation and Autotuning Framework for Parallel Iterative Stencil Computations on Modern Microarchitectures. In 2011 IEEE International Parallel & Distributed Processing Symposium. 676--687. https:\/\/doi.org\/10.1109\/IPDPS.2011.70","DOI":"10.1109\/IPDPS.2011.70"},{"key":"e_1_3_2_1_13_1","volume-title":"SPEC2000","author":"Standard Performance Evaluation Corporation","year":"2024","unstructured":"Standard Performance Evaluation Corporation. [n.d.]. SPEC2000. Accessed: 2024-03-26."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2591006"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/377792.377807"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/582034.582084"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/996841.996853"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1088149.1088197"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2544137.2544160"},{"key":"e_1_3_2_1_20_1","volume-title":"URL: https:\/\/software. intel. com\/sites\/- landingpage. IntrinsicsGuide (18.05. 2018)","author":"Guide Intel Intrinsics","year":"2015","unstructured":"Intel Intrinsics Guide. 2015. URL: https:\/\/software. intel. com\/sites\/- landingpage. IntrinsicsGuide (18.05. 2018) (2015)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751223"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1356058.1356085"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19861-8_13"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2464996.2467268"},{"key":"e_1_3_2_1_25_1","unstructured":"Intel. 2022. Intel\u00ae Advanced Vector Extensions 512 (Intel\u00ae AVX-512) - Permuting Data Within and Between AVX Registers. (2022). https:\/\/networkbuilders.intel.com\/solutionslibrary\/intel-avx-512-permuting-data-within-and-between-avx-registers-technology-guide"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/73560.73588"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00035"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/582034.582077"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470421"},{"key":"e_1_3_2_1_30_1","volume-title":"Allen","author":"Kennedy Ken","year":"2001","unstructured":"Ken Kennedy and John R. Allen. 2001. Optimizing compilers for modern architectures: a dependence-based approach. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462187"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1250734.1250761"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/106972.106981"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508834.2513149"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/349299.349320"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2002.1105970"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476154"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.68"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368826.3377904"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.2"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/1133981.1133997"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454119"},{"key":"e_1_3_2_1_43_1","unstructured":"Intel VTune Profiler. 2024. Intel. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/vtune-profiler. Accessed: 2024-08-01."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2002.1016667"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00049"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2000.10015"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/301618.301668"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007559022013"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2594291.2594342"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810096"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989493.1989508"},{"key":"e_1_3_2_1_52_1","article-title":"Intel\u00ae OpenMP C++\/Fortran Compiler for Hyper-Threading Technology: Implementation and Performance","volume":"6","author":"Tian Xinmin","year":"2002","unstructured":"Xinmin Tian, Aart Bik, Milind Girkar, Paul Grey, Hideki Saito, and Ernesto Su. 2002. Intel\u00ae OpenMP C++\/Fortran Compiler for Hyper-Threading Technology: Implementation and Performance. Intel Technology Journal 6, 1 (2002).","journal-title":"Intel Technology Journal"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/113445.113449"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/76263.76337"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1015460304860"},{"key":"e_1_3_2_1_56_1","volume-title":"IMPACT 2013 3","author":"Wonnacott David G","year":"2013","unstructured":"David G Wonnacott and Michelle Mills Strout. 2013. On the scalability of loop tiling techniques. IMPACT 2013 3 (2013)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2005.18"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","unstructured":"Charles Yount Josh Tobin Alexander Breuer and Alejandro Duran. 2016. YASK---Yet Another Stencil Kernel: A Framework for HPC Stencil Code-Generation and Tuning. In 2016 Sixth International Workshop on Domain-Specific Languages and High-Level Frameworks for High Performance Computing (WOLFHPC). 30--39. https:\/\/doi.org\/10.1109\/WOLFHPC.2016.08","DOI":"10.1109\/WOLFHPC.2016.08"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476149"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337835"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126920"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593705"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593716"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00059"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2259016.2259037"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356210"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441598"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/P3HPC.2018.00009"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/2886101"}],"event":{"name":"PPoPP '25: The 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Las Vegas NV USA","acronym":"PPoPP '25"},"container-title":["Proceedings of the 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710886","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3710848.3710886","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:16:48Z","timestamp":1755875808000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710886"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,28]]},"references-count":69,"alternative-id":["10.1145\/3710848.3710886","10.1145\/3710848"],"URL":"https:\/\/doi.org\/10.1145\/3710848.3710886","relation":{},"subject":[],"published":{"date-parts":[[2025,2,28]]},"assertion":[{"value":"2025-02-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}