{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:40:12Z","timestamp":1755870012885,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFB3001501"],"award-info":[{"award-number":["2023YFB3001501"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62322201, U23B2020"],"award-info":[{"award-number":["62322201, U23B2020"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["YWF-23-L-1121, JKF-20240198, JK2024-58"],"award-info":[{"award-number":["YWF-23-L-1121, JKF-20240198, JK2024-58"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3725768","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"265-278","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating Complex Stencil Computations with Adaptive Fusion Strategy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0761-3287","authenticated-orcid":false,"given":"Siqi","family":"Wang","sequence":"first","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1101-7927","authenticated-orcid":false,"given":"Hailong","family":"Yang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7197-2873","authenticated-orcid":false,"given":"Pengbo","family":"Wang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0540-0959","authenticated-orcid":false,"given":"Shaokang","family":"Du","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7787-6460","authenticated-orcid":false,"given":"Yufan","family":"Xu","sequence":"additional","affiliation":[{"name":"Independent Researcher, Cupertino, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2927-362X","authenticated-orcid":false,"given":"Qingxiao","family":"Sun","sequence":"additional","affiliation":[{"name":"China University of Petroleum, Beijing, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1453-6642","authenticated-orcid":false,"given":"Xiaoyan","family":"Liu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7293-545X","authenticated-orcid":false,"given":"Xuezhu","family":"Wang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7275-201X","authenticated-orcid":false,"given":"Xuning","family":"Liang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7186-0556","authenticated-orcid":false,"given":"Zhongzhi","family":"Luan","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1829-2817","authenticated-orcid":false,"given":"Yi","family":"Liu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5382-1473","authenticated-orcid":false,"given":"Depei","family":"Qian","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, - None -, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Anton Afanasyev Mauro Bianco Lukas Mosimann Carlos Osuna Felix Thaler Hannes Vogt Oliver Fuhrer Joost VandeVondele and Thomas\u00a0C Schulthess. 2021. GridTools: A framework for portable weather and climate applications. SoftwareX 15 (2021) 100707.","DOI":"10.1016\/j.softx.2021.100707"},{"key":"e_1_3_3_1_3_2","unstructured":"Krste Asanovic Ras Bodik Bryan\u00a0Christopher Catanzaro Joseph\u00a0James Gebis Parry Husbands Kurt Keutzer David\u00a0A Patterson William\u00a0Lester Plishker John Shalf Samuel\u00a0Webb Williams and Katherine\u00a0A Yelick. 2006. The landscape of parallel computing research: A view from berkeley. (2006)."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.102"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370315"},{"key":"e_1_3_3_1_7_2","unstructured":"GFDL. 2020. FV3: Finite-VolumeCubed-SphereDynamicalCore https:\/\/www.gfdl.noaa.gov\/fv3\/."},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"GP Glinski C Bailey and KA Pericleous. 2001. A non-Newtonian computational fluid dynamics study of the stencil printing process. Proceedings of the Institution of Mechanical Engineers Part C: Journal of Mechanical Engineering Science 215 4 (2001) 437\u2013446.","DOI":"10.1243\/0954406011520869"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.5555\/2342788.2342798"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Tobias Gysi Christoph M\u00fcller Oleksandr Zinenko Stephan Herhut Eddie Davis Tobias Wicky Oliver Fuhrer Torsten Hoefler and Tobias Grosser. 2021. Domain-specific multi-level IR rewriting for GPU: The Open Earth compiler for GPU-accelerated climate simulation. ACM Transactions on Architecture and Code Optimization (TACO) 18 4 (2021) 1\u201323.","DOI":"10.1145\/3469030"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807627"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3168824"},{"key":"e_1_3_3_1_13_2","unstructured":"Will Hamilton Zhitao Ying and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"HT Huynh Zhi\u00a0J Wang and Peter\u00a0E Vincent. 2014. High-order methods for computational fluid dynamics: A brief review of compact differential formulations on unstructured grids. Computers & fluids 98 (2014) 209\u2013220.","DOI":"10.1016\/j.compfluid.2013.12.007"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_3_1_16_2","unstructured":"Thomas\u00a0N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1609.02907 (2016)."},{"key":"e_1_3_3_1_17_2","unstructured":"Jakub Lichman. 2020. Learning From Generated Stencil Programs. Master\u2019s thesis. ETH Zurich."},{"key":"e_1_3_3_1_18_2","first-page":"881","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Ma Lingxiao","year":"2020","unstructured":"Lingxiao Ma, Zhiqiang Xie, Zhi Yang, Jilong Xue, Youshan Miao, Wei Cui, Wenxiang Hu, Fan Yang, Lintao Zhang, and Lidong Zhou. 2020. Rammer: Enabling holistic deep learning compiler optimizations with { rTasks}. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 881\u2013897."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3368826.3377904"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Ravi\u00a0Teja Mullapudi Vinay Vasista and Uday Bondhugula. 2015. Polymage: Automatic optimization for image processing pipelines. ACM SIGARCH Computer Architecture News 43 1 (2015) 429\u2013443.","DOI":"10.1145\/2786763.2694364"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.2"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"e_1_3_3_1_23_2","unstructured":"NVIDIA. 2012. NVIDIA: Sharing a gpu between mpi processes: multiple-process service https:\/\/docs.nvidia.com\/deploy\/mps\/index.html."},{"key":"e_1_3_3_1_24_2","unstructured":"NVIDIA. 2022. NVIDIA: Nvidia multi-instance gpu user guide https:\/\/docs.nvidia.com\/datacenter\/tesla\/mig-user-guide."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Sreepathi Pai Matthew\u00a0J Thazhuthaveetil and Ramaswamy Govindarajan. 2013. Improving GPGPU concurrency with elastic kernels. ACM SIGARCH Computer Architecture News 41 1 (2013) 407\u2013418.","DOI":"10.1145\/2490301.2451160"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Jonathan Ragan-Kelley Connelly Barnes Andrew Adams Sylvain Paris Fr\u00e9do Durand and Saman Amarasinghe. 2013. Halide: a language and compiler for optimizing parallelism locality and recomputation in image processing pipelines. Acm Sigplan Notices 48 6 (2013) 519\u2013530.","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Prashant\u00a0Singh Rawat Miheer Vaidya Aravind Sukumaran-Rajam Mahesh Ravishankar Vinod Grover Atanas Rountev Louis-No\u00ebl Pouchet and Ponnuswamy Sadayappan. 2018. Domain-specific optimization and generation of high-performance GPU code for stencil computations. Proc. IEEE 106 11 (2018) 1902\u20131920.","DOI":"10.1109\/JPROC.2018.2862896"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00073"},{"key":"e_1_3_3_1_29_2","unstructured":"U Sch\u00e4ttler G Doms and C Schraff. 2008. A description of the nonhydrostatic regional COSMO-model part VII: user\u2019s guide. Deutscher Wetterdienst Rep. COSMO-Model 4 (2008) 142."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/FPL50879.2020.00014"},{"key":"e_1_3_3_1_31_2","unstructured":"STC. 2020. Consortium for Small-scale Modeling http:\/\/www.cosmo-model.org\/."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00037"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00090"},{"key":"e_1_3_3_1_34_2","unstructured":"Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Lio and Yoshua Bengio. 2017. Graph attention networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1710.10903 (2017)."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.21"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446078"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Yu Xie Chuanyu Yao Maoguo Gong Cheng Chen and A\u00a0Kai Qin. 2020. Graph convolutional networks with multi-level coarsening for graph classification. Knowledge-Based Systems 194 (2020) 105578.","DOI":"10.1016\/j.knosys.2020.105578"},{"key":"e_1_3_3_1_38_2","unstructured":"Keyulu Xu Weihua Hu Jure Leskovec and Stefanie Jegelka. 2018. How powerful are graph neural networks? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.00826 (2018)."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33017370"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476149"},{"key":"e_1_3_3_1_41_2","unstructured":"Jie Zhao Xiong Gao Ruijie Xia Zhaochuang Zhang Deshi Chen Lei Chen Renwei Zhang Zhen Geng Bin Cheng and Xuefeng Jin. 2022. Apollo: Automatic partition-based operator fusion through layer by layer optimization. Proceedings of Machine Learning and Systems 4 (2022) 1\u201319."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356210"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507723"}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3725768","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:01:07Z","timestamp":1755867667000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3725768"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":42,"alternative-id":["10.1145\/3721145.3725768","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3725768","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}