{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T17:30:16Z","timestamp":1771954216390,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T00:00:00Z","timestamp":1740441600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","award":["444127\\\/2024-0"],"award-info":[{"award-number":["444127\\\/2024-0"]}]},{"name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","award":["PrInt"],"award-info":[{"award-number":["PrInt"]}]},{"name":"FAPEMIG","award":["APQ-00440-23"],"award-info":[{"award-number":["APQ-00440-23"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,25]]},"DOI":"10.1145\/3708493.3712689","type":"proceedings-article","created":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T17:02:04Z","timestamp":1740502924000},"page":"117-127","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Fusion of Operators of Computational Graphs via Greedy Clustering: The XNNC Experience"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7882-0787","authenticated-orcid":false,"given":"Michael","family":"Canesche","sequence":"first","affiliation":[{"name":"Cadence Design Systems, Belo Horizonte, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8737-0252","authenticated-orcid":false,"given":"Vanderson Martins","family":"do Rosario","sequence":"additional","affiliation":[{"name":"Cadence Design Systems, San Jose, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1783-4231","authenticated-orcid":false,"given":"Edson","family":"Borin","sequence":"additional","affiliation":[{"name":"State University of Campinas, Campinas, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0375-1657","authenticated-orcid":false,"given":"Fernando Magno","family":"Quint\u00e3o Pereira","sequence":"additional","affiliation":[{"name":"Federal University of Minas Gerais, Belo Horizonte, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,2,25]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Xtensa Neural Network Compiler: Optimizer. IEEE ML Compiler Workshop","author":"Arbatov Volodymyr","year":"2019","unstructured":"Volodymyr Arbatov, Pedro Vaz Artigas, and Xianmin Chen. 2019. Xtensa Neural Network Compiler: Optimizer. IEEE ML Compiler Workshop"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197978"},{"key":"e_1_3_2_1_3_1","unstructured":"Riyadh Baghdadi Massinissa Merouani Mohamed-Hicham Leghettas Kamel Abdous Taha Arbaoui Karima Benatchba and Saman Amarasinghe. 2021. A Deep Learning Based Cost Model for Automatic Code Optimization. arxiv:2104.04955. arxiv:2104.04955"},{"key":"e_1_3_2_1_4_1","unstructured":"Mathieu Blondel and Vincent Roulet. 2024. The Elements of Differentiable Programming. arxiv:2403.14606. arxiv:2403.14606"},{"key":"e_1_3_2_1_5_1","unstructured":"Cadence. 2024. Tensilica XNNC v2 full course. https:\/\/www.cadence.com\/en_US\/home\/training\/all-courses\/86277.html"},{"key":"e_1_3_2_1_6_1","unstructured":"Cadence. 2024. Xtensa Neural Network Compiler User Guide [White Paper]."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Xuyi Cai Ying Wang and Lei Zhang. 2021. Optimus: towards optimal layer-fusion on deep learning processors. 67\u201379 pages.","DOI":"10.1145\/3461648.3463848"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3520142"},{"key":"e_1_3_2_1_9_1","unstructured":"Michael Canesche. 2024. Optimizing Machine Learning Models: A Droplet Search Approach to Efficient Kernel Scheduling. Universidade Federal de Minas Gerais. Belo Horizonte Brazil."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3650109"},{"key":"e_1_3_2_1_11_1","unstructured":"Michael Canesche Gaurav Verma and Fernando Magno Quintao Pereira. 2024. Explore as a Storm Exploit as a Raindrop: On the Benefit of Fine-Tuning Kernel Schedulers with Coordinate Descent. arxiv:2406.20037. arxiv:2406.20037"},{"key":"e_1_3_2_1_12_1","volume-title":"TVM: an automated end-to-end optimizing compiler for deep learning","author":"Chen Tianqi","year":"1931","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Meghan Cowan, Haichen Shen, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: an automated end-to-end optimizing compiler for deep learning. In OSDI. USENIX Association, USA. 579\u2013594. isbn:9781931971478"},{"key":"e_1_3_2_1_13_1","unstructured":"Scott Cyphers Arjun K Bansal Anahita Bhiwandiwalla Jayaram Bobba Matthew Brookhart Avijit Chakraborty Will Constable Christian Convey Leona Cook and Omar Kanawi. 2018. Intel nGraph: An intermediate representation compiler and executor for deep learning."},{"key":"e_1_3_2_1_14_1","unstructured":"Pooya Davoodi Chul Gwon Guangda Lai and Trevor Morris. 2019. Tensorrt inference with tensorflow."},{"key":"e_1_3_2_1_15_1","unstructured":"Pulin Desai. 2021. Vision and AI DSPs for Ultra-High-End and Always-On Applications. In Embedded Vision Summit. Cadence Design Systems San Jose US. 24 pages. https:\/\/www.edge-ai-vision.com\/wp-content\/uploads\/2021\/05\/ET067_Desai_Cadence.pdf"},{"key":"e_1_3_2_1_16_1","volume-title":"LoopTree: Enabling Systematic and Flexible Exploration of Fused-layer Dataflow Accelerators. Ph. D. Dissertation","author":"Gilbert Michael","unstructured":"Michael Gilbert. 2023. LoopTree: Enabling Systematic and Flexible Exploration of Fused-layer Dataflow Accelerators. Ph. D. Dissertation. Massachusetts Institute of Technology."},{"key":"e_1_3_2_1_17_1","unstructured":"Renato Golin Lorenzo Chelini Adam Siemieniuk Kavitha Madhu Niranjan Hasabnis Hans Pabst Evangelos Georganas and Alexander Heinecke. 2024. Towards a high-performance AI compiler with upstream MLIR. arxiv:2404.15204. arxiv:2404.15204"},{"key":"e_1_3_2_1_18_1","unstructured":"Google. 2024. XLA Overview. https:\/\/openxla.org\/xla"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3030548"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874254"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2212.05344"},{"key":"e_1_3_2_1_24_1","volume-title":"Iheb Nassim Aouadj, Nassim Tchoulak, Islem Kara Bernou, Hamza Benyamina, Fatima Benbouzid-Si Tayeb, Karima Benatchba, Hugh Leather, and Riyadh Baghdadi.","author":"Merouani Massinissa","year":"2024","unstructured":"Massinissa Merouani, Khaled Afif Boudaoud, Iheb Nassim Aouadj, Nassim Tchoulak, Islem Kara Bernou, Hamza Benyamina, Fatima Benbouzid-Si Tayeb, Karima Benatchba, Hugh Leather, and Riyadh Baghdadi. 2024. LOOPer: A Learned Automatic Code Optimizer For Polyhedral Compilers. arxiv:2403.11522. arxiv:2403.11522"},{"key":"e_1_3_2_1_25_1","unstructured":"Microsoft. 2024. ONNX Runtime. https:\/\/onnxruntime.ai\/"},{"key":"e_1_3_2_1_26_1","unstructured":"Orlando Moreira Merten Popp and Christian Schulz. 2017. Graph partitioning with acyclicity constraints."},{"key":"e_1_3_2_1_27_1","unstructured":"Apache MxNet. 2024. Apache MxNet: A flexible and efficient library for deep learning. https:\/\/mxnet.apache.org\/"},{"key":"e_1_3_2_1_28_1","volume-title":"From loop fusion to kernel fusion: a domain-specific approach to locality optimization","author":"Qiao Bo","unstructured":"Bo Qiao, Oliver Reiche, Frank Hannig, and J\u00fcrgen Teich. 2019. From loop fusion to kernel fusion: a domain-specific approach to locality optimization. In CGO. IEEE Press, Washington, DC, USA. 242\u2013253. isbn:9781728114361"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1908.11348"},{"key":"e_1_3_2_1_30_1","volume-title":"Glow: Graph lowering compiler techniques for neural networks.","author":"Rotem Nadav","year":"2018","unstructured":"Nadav Rotem, Jordan Fix, Saleem Abdulrasool, Garret Catron, Summer Deng, Roman Dzhabarov, Nick Gibson, James Hegeman, Meghan Lele, and Roman Levenstein. 2018. Glow: Graph lowering compiler techniques for neural networks."},{"key":"e_1_3_2_1_31_1","volume-title":"Welder: Scheduling Deep Learning Memory Access via Tile-graph","author":"Shi Yining","year":"2023","unstructured":"Yining Shi, Zhi Yang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Ziming Miao, Yuxiao Guo, Fan Yang, and Lidong Zhou. 2023. Welder: Scheduling Deep Learning Memory Access via Tile-graph. In OSDI, Roxana Geambasu and Ed Nightingale (Eds.). USENIX Association, Berkeley, USA. 701\u2013718."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.21"},{"key":"e_1_3_2_1_33_1","unstructured":"Thomas J. Watson F. E. Allen and J. Cocke. 1971. A catalogue of optimizing transformations."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/AICAS57966.2023.10168659"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3635305"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071018"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507723"},{"key":"e_1_3_2_1_38_1","volume-title":"Densenet for dense flow","author":"Zhu Yi","unstructured":"Yi Zhu and Shawn Newsam. 2017. Densenet for dense flow. In ICIP. IEEE, New York, US. 790\u2013794."}],"event":{"name":"CC '25: 34th ACM SIGPLAN International Conference on Compiler Construction","location":"Las Vegas NV USA","acronym":"CC '25","sponsor":["SIGPLAN SIGPLAN Programming Languages"]},"container-title":["Proceedings of the 34th ACM SIGPLAN International Conference on Compiler Construction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708493.3712689","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708493.3712689","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:54Z","timestamp":1750295394000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708493.3712689"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,25]]},"references-count":38,"alternative-id":["10.1145\/3708493.3712689","10.1145\/3708493"],"URL":"https:\/\/doi.org\/10.1145\/3708493.3712689","relation":{},"subject":[],"published":{"date-parts":[[2025,2,25]]},"assertion":[{"value":"2025-02-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}