{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T08:47:09Z","timestamp":1771922829552,"version":"3.50.1"},"reference-count":71,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,1,31]]},"DOI":"10.1109\/cgo68049.2026.11395192","type":"proceedings-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T20:46:32Z","timestamp":1771879592000},"page":"150-163","source":"Crossref","is-referenced-by-count":0,"title":["Ember: A Compiler for Embedding Operations on Decoupled Access-Execute Architectures"],"prefix":"10.1109","author":[{"given":"Marco","family":"Siracusa","sequence":"first","affiliation":[{"name":"Barcelona Supercomputing Center,Barcelona,Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Olivia","family":"Hsu","sequence":"additional","affiliation":[{"name":"Stanford University,Stanford,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Victor","family":"Soria-Pardos","sequence":"additional","affiliation":[{"name":"Barcelona Supercomputing Center,Barcelona,Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joshua","family":"Randall","sequence":"additional","affiliation":[{"name":"Arm,Austin,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Arnaud","family":"Grasset","sequence":"additional","affiliation":[{"name":"Arm,Biot,France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric","family":"Biscondi","sequence":"additional","affiliation":[{"name":"Arm,Biot,France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Doug","family":"Joseph","sequence":"additional","affiliation":[{"name":"Arm,Austin,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Randy","family":"Allen","sequence":"additional","affiliation":[{"name":"Barcelona Supercomputing Center,Barcelona,Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fredrik","family":"Kjolstad","sequence":"additional","affiliation":[{"name":"Stanford University,Stanford,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Miquel Moret\u00f3","family":"Planas","sequence":"additional","affiliation":[{"name":"Universitat Polit&#x00E8;cnica de Catalunya,Barcelona,Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adri\u00e0","family":"Armejach","sequence":"additional","affiliation":[{"name":"Universitat Polit&#x00E8;cnica de Catalunya,Barcelona,Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1532","article-title":"GloVe: Global vectors for word representation","volume-title":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)","author":"Pennington"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/isca45697.2020.00070"},{"key":"ref3","first-page":"17 283","article-title":"Big bird: Transformers for longer sequences","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Zaheer","year":"2020"},{"key":"ref4","article-title":"Open graph benchmark: datasets for machine learning on graphs","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Hu"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/hpca47549.2020.00047"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1067649.801719"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614284"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00087"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527400"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460368"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589348"},{"key":"ref13","volume-title":"PyTorch: an imperative style, high-performance deep learning library","author":"Paszke","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3488423.3519317"},{"key":"ref15","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00034"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/scientificamerican0501-34"},{"key":"ref18","article-title":"Nvidia h100 specs","year":"2024"},{"key":"ref19","article-title":"Nvidia hopper architecture in-depth","year":"2022"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3630007"},{"key":"ref21","article-title":"Deep learning recommendation model for personalization and recommendation systems","author":"Naumov","year":"2019"},{"key":"ref22","first-page":"104941","article-title":"Spchar: Characterizing the sparse puzzle via decision trees","volume-title":"Journal of Parallel and Distributed Computing","volume":"192","author":"Sgherzi","year":"2024"},{"key":"ref23","article-title":"The gem5 Simulator: Version 20.0+","author":"Lowe-Power","year":"2020"},{"key":"ref24","first-page":"469","article-title":"Mcpat: An integrated power, area, and timing modeling framework for multicore and manycore architectures","volume-title":"2009 42nd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)","author":"Li"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611971538"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582051"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.6570"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586203"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM51124.2021.00017"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3456669.3456671"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3111761"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/cgo51591.2021.9370308"},{"key":"ref34","article-title":"Torch-MLIR"},{"key":"ref35","article-title":"Mlir sparsifier","year":"2021"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661185"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3656426"},{"key":"ref38","volume-title":"Optimizing compilers for modern architectures: a dependence-based approach","author":"Kennedy","year":"2001"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.35"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3544559"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/996841.996853"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-71229-9_1"},{"key":"ref43","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"Abadi","year":"2015"},{"key":"ref44","article-title":"Deep graph library: A graph-centric, highly-performant package for graph neural networks","author":"Jia","year":"2020"},{"key":"ref45","article-title":"JAX: composable transformations of Python+NumPy programs","author":"Bradbury","year":"2018"},{"key":"ref46","article-title":"Fast graph representation learning with PyTorch Geometric","volume-title":"ICLR Workshop on Representation Learning on Graphs and Manifolds","author":"Fey"},{"key":"ref47","article-title":"Glow: Graph lowering compiler techniques for neural networks","author":"Rotem","year":"2018"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3579990.3580020"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3591236"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3591268"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3428226"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3649816"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2025.3604724"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3696443.3708918"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/VLSITechnologyandCir46783.2024.10631383"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00064"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3696443.3708952"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830800"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2020.3012318"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2011.2110592"},{"key":"ref63","first-page":"736","article-title":"Stream-based memory access specialization for general purpose processors","volume-title":"2019 ACM\/IEEE 46th Annual International Symposium on Computer Architecture (ISCA)","author":"Wang"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/2544137.2544161"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3461472"},{"key":"ref66","article-title":"Ember artifact","author":"Siracusa","year":"2025"},{"key":"ref67","article-title":"SNAP Datasets: Stanford large network dataset collection","author":"Leskovec","year":"2014"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"ref69","article-title":"Nvidia nsight systems","year":"2024"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533727"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1137\/18m1210691"}],"event":{"name":"2026 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","location":"Sydney, Australia","start":{"date-parts":[[2026,1,31]]},"end":{"date-parts":[[2026,2,4]]}},"container-title":["2026 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11395173\/11394837\/11395192.pdf?arnumber=11395192","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T07:42:02Z","timestamp":1771918922000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11395192\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,31]]},"references-count":71,"URL":"https:\/\/doi.org\/10.1109\/cgo68049.2026.11395192","relation":{},"subject":[],"published":{"date-parts":[[2026,1,31]]}}}