{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T08:03:25Z","timestamp":1764403405384,"version":"3.44.0"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&amp;D Program of China","doi-asserted-by":"publisher","award":["2021ZD0110203"],"award-info":[{"award-number":["2021ZD0110203"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"publisher","award":["92373110","U22A2028","62341411","U23B2020","62322201","62072018"],"award-info":[{"award-number":["92373110","U22A2028","62341411","U23B2020","62322201","62072018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tc.2024.3441820","type":"journal-article","created":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T14:16:20Z","timestamp":1724336180000},"page":"3251-3262","source":"Crossref","is-referenced-by-count":1,"title":["Sifter: An Efficient Operator Auto-Tuner With Speculative Design Space Exploration for Deep Learning Compiler"],"prefix":"10.1109","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1379-0576","authenticated-orcid":false,"given":"Qianhe","family":"Zhao","sequence":"first","affiliation":[{"name":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2741-6033","authenticated-orcid":false,"given":"Rui","family":"Wang","sequence":"additional","affiliation":[{"name":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1829-2817","authenticated-orcid":false,"given":"Yi","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1101-7927","authenticated-orcid":false,"given":"Hailong","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7186-0556","authenticated-orcid":false,"given":"Zhongzhi","family":"Luan","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5382-1473","authenticated-orcid":false,"given":"Depei","family":"Qian","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]}],"member":"263","reference":[{"article-title":"LLaMA: Open and efficient Foundation Language models","year":"2023","author":"Touvron","key":"ref1"},{"volume-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref2"},{"issue":"8","key":"ref3","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"year":"2023","key":"ref5","article-title":"cuBLAS: Basic linear algebra on NVIDIA GPUs"},{"article-title":"cuDNN: Efficient primitives for deep learning","year":"2014","author":"Chetlur","key":"ref6"},{"year":"2023","key":"ref7","article-title":"CUTLASS: Cuda templates for linear algebra subroutines"},{"year":"2023","key":"ref8","article-title":"Accelerate fast math with intel\u00ae oneAPI math kernel library"},{"year":"2023","key":"ref9","article-title":"oneAPI deep neural network library (oneDNN)"},{"key":"ref10","first-page":"578","article-title":"{TVM}: An automated {End-to-End} optimizing compiler for deep learning","volume-title":"Proc. 13th USENIX Symp. Operating Syst. Design Implementation (OSDI)","author":"Chen","year":"2018"},{"article-title":"Intel nGraph: An intermediate representation, compiler, and executor for deep learning","year":"2018","author":"Cyphers","key":"ref11"},{"article-title":"Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions","year":"2018","author":"Vasilache","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"ref14","first-page":"3393","article-title":"Learning to optimize tensor programs","volume":"31","author":"Chen","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref15","first-page":"863","article-title":"Ansor: Generating {High-Performance} tensor programs for deep learning","volume-title":"Proc. 14th USENIX Symp. Operating Syst. Des. Implementation (OSDI)","author":"Zheng","year":"2020"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582061"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527440"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454106"},{"article-title":"Chameleon: Adaptive code optimization for expedited deep neural network compilation","year":"2020","author":"Ahn","key":"ref19"},{"key":"ref20","first-page":"233","article-title":"{ROLLER}: Fast and efficient tensor compilation for deep learning","volume-title":"Proc. 16th USENIX Symp. Operating Syst. Des. Implementation (OSDI)","author":"Zhu","year":"2022"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"article-title":"Very deep convolutional networks for large-scale image recognition","year":"2014","author":"Simonyan","key":"ref23"},{"year":"2023","key":"ref24","article-title":"NVIDIA tensor cores"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"ref26","article-title":"TenSet: A large-scale program performance dataset for learned tensor compilers","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst. Datasets Benchmarks Track (Round 1)","author":"Zheng","year":"2021"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575737"},{"key":"ref28","article-title":"BALTO: Fast tensor program optimization with diversity-based active learning","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Bi","year":"2022"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2023.3279233"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.11919\/j.issn.1002\u20130829.215044"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022643204877"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993309"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1201\/9781315139470"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.29172\/7c2a6982-6d72-4cd8-bba6-2fccb06a7011"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ESLsyn.2014.6850383"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/12\/11164542\/10643602.pdf?arnumber=10643602","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T17:33:34Z","timestamp":1758044014000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10643602\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":35,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tc.2024.3441820","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"type":"print","value":"0018-9340"},{"type":"electronic","value":"1557-9956"},{"type":"electronic","value":"2326-3814"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}