{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:09:03Z","timestamp":1757617743856,"version":"3.44.0"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819628292"},{"type":"electronic","value":"9789819628308"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2830-8_5","type":"book-chapter","created":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T19:20:59Z","timestamp":1743362459000},"page":"53-64","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DTuner: A\u00a0Construction-Based Optimization Method for\u00a0Dynamic Tensor Operators Accelerating"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6612-8000","authenticated-orcid":false,"given":"Wenxin","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8360-7718","authenticated-orcid":false,"given":"Boyu","family":"Diao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2543-0655","authenticated-orcid":false,"given":"Hangda","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1867-503X","authenticated-orcid":false,"given":"RuiSheng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6647-0986","authenticated-orcid":false,"given":"Yongjun","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,29]]},"reference":[{"key":"5_CR1","unstructured":"Google cloud TPU. https:\/\/cloud.google.com\/tpu. Information on Google\u2019s Tensor Processing Units. Accessed 24 Aug 2024"},{"key":"5_CR2","unstructured":"Abadi, M., et al.: $$\\{$$TensorFlow$$\\}$$: a system for $$\\{$$Large-Scale$$\\}$$ machine learning. In: 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2016), pp. 265\u2013283 (2016)"},{"key":"5_CR3","unstructured":"Chen, T., et\u00a0al.: $$\\{$$TVM$$\\}$$: An automated $$\\{$$End-to-End$$\\}$$ optimizing compiler for deep learning. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2018), pp. 578\u2013594 (2018)"},{"key":"5_CR4","unstructured":"Chen, T., et al.: Learning to optimize tensor programs. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"5_CR5","unstructured":"Devlin, J., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"5_CR6","unstructured":"Lai, R., et al.: Relax: composable abstractions for end-to-end dynamic machine learning (2023). https:\/\/arxiv.org\/abs\/2311.02103"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Lattner, C., et al.: MLIR: scaling compiler infrastructure for domain specific computation. In: 2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 2\u201314. IEEE (2021)","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Liu, H., Diao, B., Chen, W., Xu, Y.: A resource-aware workload scheduling method for unbalanced GEMMs on GPUs. Comput. J. bxae110 (2024)","DOI":"10.1093\/comjnl\/bxae110"},{"key":"5_CR9","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1016\/j.sysarc.2019.01.011","volume":"97","author":"S Mittal","year":"2019","unstructured":"Mittal, S.: A survey on optimized implementation of deep learning models on the NVIDIA Jetson platform. J. Syst. Architect. 97, 428\u2013442 (2019)","journal-title":"J. Syst. Architect."},{"key":"5_CR10","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners (2019)"},{"key":"5_CR11","unstructured":"Radford, A.: Improving language understanding by generative pre-training (2018)"},{"key":"5_CR12","unstructured":"Rotem, N., et\u00a0al.: Glow: graph lowering compiler techniques for neural networks. arXiv preprint arXiv:1805.00907 (2018)"},{"key":"5_CR13","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1134\/S0361768820030068","volume":"46","author":"VV Sanzharov","year":"2020","unstructured":"Sanzharov, V.V., Frolov, V.A., Galaktionov, V.A.: Survey of NVIDIA RTX technology. Program. Comput. Softw. 46, 297\u2013304 (2020)","journal-title":"Program. Comput. Softw."},{"key":"5_CR14","unstructured":"Shen, H., Chen, T., Liu, Y., Guestrin, C.: Nimble: a compiler and runtime for efficient dynamic neural networks. In: Proceedings of Machine Learning and Systems (MLSys), pp. 1\u201313. MLSys Organization, Virtual (2021)"},{"key":"5_CR15","unstructured":"TensorFlow Development Team: XLA: Tensorflow, compiled. https:\/\/www.tensorflow.org\/xla"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Tillet, P.: Triton: an intermediate language and compiler for tiled neural network computations. In: Proceedings of the 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages, pp. 1\u201312 (2019)","DOI":"10.1145\/3315508.3329973"},{"key":"5_CR17","unstructured":"Vaswani, A.: Attention is all you need. In: Advances in Neural Information Processing Systems (2017)"},{"key":"5_CR18","first-page":"204","volume":"4","author":"J Xing","year":"2022","unstructured":"Xing, J., Wang, L., Zhang, S., Chen, J., Chen, A., Zhu, Y.: Bolt: bridging the gap between auto-tuners and hardware-native performance. Proc. Mach. Learn. Syst. 4, 204\u2013216 (2022)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"5_CR19","first-page":"848","volume":"4","author":"B Zheng","year":"2022","unstructured":"Zheng, B., et al.: DietCode: automatic optimization for dynamic tensor programs. Proc. Mach. Learn. Syst. 4, 848\u2013863 (2022)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"5_CR20","unstructured":"Zheng, L., Yan, E., Chen, T., Moreau, T., Zhang, Z.: Ansor: Generating high-performance tensor programs for deep learning. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI), pp. 863\u2013879. USENIX Association, Virtual (2020)"},{"key":"5_CR21","unstructured":"Zheng, L., et\u00a0al.: Ansor: generating $$\\{$$High-Performance$$\\}$$ tensor programs for deep learning. In: 14th USENIX symposium on operating systems design and implementation (OSDI 2020), pp. 863\u2013879 (2020)"},{"issue":"3","key":"5_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3617327","volume":"1","author":"Z Zheng","year":"2023","unstructured":"Zheng, Z., et al.: BladeDISC: optimizing dynamic shape machine learning workloads via compiler approach. Proc. ACM Manage. Data 1(3), 1\u201329 (2023)","journal-title":"Proc. ACM Manage. Data"},{"key":"5_CR23","unstructured":"Zhu, H., et\u00a0al.: ROLLER: fast and efficient tensor compilation for deep learning. In: 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI), pp. 599\u2013616 (2022)"},{"key":"5_CR24","unstructured":"Zhu, H., et\u00a0al.: $$\\{$$ROLLER$$\\}$$: fast and efficient tensor compilation for deep learning. In: 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2022), pp. 233\u2013248 (2022)"}],"container-title":["Lecture Notes in Computer Science","Network and Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2830-8_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T08:53:20Z","timestamp":1757148800000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2830-8_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819628292","9789819628308"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2830-8_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"29 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Network and Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Haikou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"npc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}