{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T20:35:03Z","timestamp":1763584503014,"version":"3.45.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T00:00:00Z","timestamp":1726185600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T00:00:00Z","timestamp":1726185600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s11432-024-4071-6","type":"journal-article","created":{"date-parts":[[2024,9,15]],"date-time":"2024-09-15T21:02:11Z","timestamp":1726434131000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["TSCompiler: efficient compilation framework for dynamic-shape models"],"prefix":"10.1007","volume":"67","author":[{"given":"Xiang","family":"Luo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenbo","family":"Geng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanzhi","family":"Yi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiahui","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Renwei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhen","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gianpietro","family":"Consolaro","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fan","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tun","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ning","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Shang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,13]]},"reference":[{"key":"4071_CR1","unstructured":"Bai Y, Jones A, Ndousse K, et al. Training a helpful and harmless assistant with reinforcement learning from human feedback. 2022. ArXiv:2204.05862"},{"key":"4071_CR2","volume-title":"Proceedings of the 13th USENIX conference on Operating Systems Design and Implementation","author":"T Chen","year":"2018","unstructured":"Chen T, Moreau T, Jiang Z, et al. TVM: an automated end-to-end optimizing compiler for deep learning. In: Proceedings of the 13th USENIX conference on Operating Systems Design and Implementation, 2018"},{"key":"4071_CR3","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"T Chen","year":"2018","unstructured":"Chen T, Zheng L, Yan E, et al. Learning to optimize tensor programs. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, 2018"},{"key":"4071_CR4","volume-title":"Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation","author":"L Zheng","year":"2020","unstructured":"Zheng L, Jia C, Sun M, et al. Ansor: generating high-performance tensor programs for deep learning. In: Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation, 2020"},{"key":"4071_CR5","volume-title":"Proceedings of 16th USENIX Symposium on Operating Systems Design and Implementation","author":"H Zhu","year":"2022","unstructured":"Zhu H, Wu R, Diao Y, et al. ROLLER: fast and efficient tensor compilation for deep learning. In: Proceedings of 16th USENIX Symposium on Operating Systems Design and Implementation, 2022"},{"key":"4071_CR6","volume-title":"Proceedings of Machine Learning and Systems","author":"B Zheng","year":"2022","unstructured":"Zheng B, Jiang Z, Yu C, et al. DietCode: automatic optimization for dynamic tensor programs. In: Proceedings of Machine Learning and Systems, 2022"},{"key":"4071_CR7","doi-asserted-by":"publisher","first-page":"3178","DOI":"10.1109\/TC.2023.3288758","volume":"72","author":"P Mu","year":"2023","unstructured":"Mu P, Liu Y, Wang R, et al. HAOTuner: a hardware adaptive operator auto-tuner for dynamic shape tensor compilers. IEEE Trans Comput, 2023, 72: 3178\u20133190","journal-title":"IEEE Trans Comput"},{"key":"4071_CR8","volume-title":"Proceedings of Machine Learning and Systems","author":"H Shen","year":"2021","unstructured":"Shen H, Roesch J, Chen Z, et al. Nimble: efficiently compiling dynamic neural networks for model inference. In: Proceedings of Machine Learning and Systems 3, 2021"},{"key":"4071_CR9","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"J Devlin","year":"2019","unstructured":"Devlin J, Chang M, Lee K, et al. BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2019"},{"key":"4071_CR10","volume-title":"Proceedings of International Conference on Learning Representations","author":"A Dosovitskiy","year":"2021","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et al. An image is worth 16 \u00d7 16 words: transformers for image recognition at scale. In: Proceedings of International Conference on Learning Representations, 2021"},{"key":"4071_CR11","unstructured":"Lan Z, Chen M, Goodman S, et al. ALBERT: a lite BERT for self-supervised learning of language representations. 2019. ArXiv:1909.11942"},{"key":"4071_CR12","volume-title":"Proceedings of the 1st Workshop on Machine Learning and Systems","author":"K Zhu","year":"2021","unstructured":"Zhu K, Zhao W, Zheng Z, et al. DISC: a dynamic shape compiler for machine learning workloads. In: Proceedings of the 1st Workshop on Machine Learning and Systems, 2021"},{"key":"4071_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2743016","volume":"37","author":"T Grosser","year":"2015","unstructured":"Grosser T, Verdoolaege S, Cohen A. Polyhedral AST generation is more than scanning polyhedra. ACM Trans Program Lang Syst, 2015, 37: 1\u201350","journal-title":"ACM Trans Program Lang Syst"},{"key":"4071_CR14","volume-title":"Proceedings of the 12th International Workshop on Polyhedral Compilation Techniques (associated with HIPEAC 2020)","author":"R Baghdadi","year":"2020","unstructured":"Baghdadi R, Cohen A. Scalable polyhedral compilation, syntax vs. semantics: 1\u20130 in the first round. In: Proceedings of the 12th International Workshop on Polyhedral Compilation Techniques (associated with HIPEAC 2020), 2020"},{"key":"4071_CR15","volume-title":"Proceedings of \/ACM International Symposium on Code Generation and Optimization (CGO)","author":"C Bastoul","year":"2022","unstructured":"Bastoul C, Zhang Z, Razanajato H, et al. Optimizing GPU deep learning operators with polyhedral scheduling constraint injection. In: Proceedings of \/ACM International Symposium on Code Generation and Optimization (CGO), 2022"},{"key":"4071_CR16","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"D Eriksson","year":"2019","unstructured":"Eriksson D, Pearce M, Gardner J, et al. Scalable global optimization via local Bayesian optimization. In: Proceedings of Advances in Neural Information Processing Systems, 2019"},{"key":"4071_CR17","volume-title":"Proceedings of the 53rd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)","author":"J Zhao","year":"2020","unstructured":"Zhao J, Di P. Optimizing the memory hierarchy by compositing automatic transformations on computations and data. In: Proceedings of the 53rd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), 2020"},{"key":"4071_CR18","volume-title":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","author":"S Feng","year":"2023","unstructured":"Feng S, Hou B, Jin H, et al. TensorIR: an abstraction for automatic tensorized program optimization. In: Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, 2023"},{"key":"4071_CR19","volume-title":"Proceedings of Machine Learning and Systems 4 (MLSys 2022)","author":"J Zhao","year":"2022","unstructured":"Zhao J, Gao X, Xia R, et al. Apollo: automatic partition-based operator fusion through layer by layer optimization. In: Proceedings of Machine Learning and Systems 4 (MLSys 2022), 2022"},{"key":"4071_CR20","volume-title":"Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation","author":"W Niu","year":"2021","unstructured":"Niu W, Guan J, Wang Y, et al. DNNFusion: accelerating deep neural networks execution with advanced operator fusion. In: Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation, 2021"},{"key":"4071_CR21","volume-title":"Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","author":"Z Zheng","year":"2022","unstructured":"Zheng Z, Yang X, Zhao P, et al. AStitch: enabling a new multi-dimensional optimization space for memory-intensive ML training and inference on modern SIMT architectures. In: Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, 2022"},{"key":"4071_CR22","volume-title":"Proceedings of the 20th IEEE\/ACM International Symposium on Code Generation and Optimization","author":"A Li","year":"2022","unstructured":"Li A, Zheng B, Pekhimenko G, et al. Automatic horizontal fusion for GPU kernels. In: Proceedings of the 20th IEEE\/ACM International Symposium on Code Generation and Optimization, 2022"},{"key":"4071_CR23","volume-title":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","author":"J Zhao","year":"2022","unstructured":"Zhao J, Bastoul C, Yi Y, et al. Parallelizing neural network models effectively on GPU by implementing reductions atomically. In: Proceedings of the International Conference on Parallel Architectures and Compilation Techniques, 2022"},{"key":"4071_CR24","unstructured":"Vasilache N, Zinenko O, Theodoridis T, et al. Tensor comprehensions: framework-agnostic high-performance machine learning abstractions. 2018. ArXiv:1802.04730"},{"key":"4071_CR25","unstructured":"Chetlur S, Woolley C, Vandermersch S. cuDNN: efficient primitives for deep learning. 2014. ArXiv:1410.0759"},{"key":"4071_CR26","volume-title":"Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","author":"K Hegde","year":"2021","unstructured":"Hegde K, Tsai P, Huang S, et al. Mind mappings: enabling efficient algorithm-accelerator mapping space search. In: Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, 2021"},{"key":"4071_CR27","series-title":"CW Report","volume-title":"Scheduling for PPCG","author":"S Verdoolaege","year":"2017","unstructured":"Verdoolaege S, Janssens G. Scheduling for PPCG. CW Report, Department of Computer Science, 2017"},{"key":"4071_CR28","volume-title":"Proceedings of International Journal of Parallel Programming","author":"P Feautrier","year":"1992","unstructured":"Feautrier P. Some efficient solutions to the affine scheduling problem. Part II. multidimensional time. In: Proceedings of International Journal of Parallel Programming, 1992"},{"key":"4071_CR29","volume-title":"Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation","author":"J Zhao","year":"2021","unstructured":"Zhao J, Li B, Nie W, et al. AKG: automatic kernel generation for neural processing units using polyhedral transformations. In: Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation, 2021"},{"key":"4071_CR30","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"C Szegedy","year":"2015","unstructured":"Szegedy C, Liu W, Jia Y, et al. Going deeper with convolutions. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2015"},{"key":"4071_CR31","volume-title":"Proceedings of International Congress on Mathematical Software","author":"S Verdoolaege","year":"2010","unstructured":"Verdoolaege S. isl: an integer set library for the polyhedral model. In: Proceedings of International Congress on Mathematical Software, 2010"},{"key":"4071_CR32","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1109\/TPDS.2022.3217824","volume":"34","author":"W Sun","year":"2023","unstructured":"Sun W, Li A, Geng T, et al. Dissecting tensor cores via microbenchmarks: latency, throughput and numeric behaviors. IEEE Trans Parallel Distrib Syst, 2023, 34: 246\u2013261","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"4071_CR33","unstructured":"Vasilache N, Zinenko O, Bik A, et al. Composable and modular code generation in MLIR: a structured and retargetable approach to tensor compiler construction. 2022. ArXiv:2202.03293"},{"key":"4071_CR34","volume-title":"Proceedings of the IEEE\/ACM International Symposium on Code Generation and Optimization","author":"C Lattner","year":"2021","unstructured":"Lattner C, Amini M, Bondhugula U, et al. MLIR: scaling compiler infrastructure for domain specific computation. In: Proceedings of the IEEE\/ACM International Symposium on Code Generation and Optimization, 2021"},{"key":"4071_CR35","volume-title":"Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","author":"M Osama","year":"2023","unstructured":"Osama M, Merrill D, Cecka C, et al. Stream-K: work-centric parallel decomposition for dense matrix-matrix multiplication on the GPU. In: Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, 2023"},{"key":"4071_CR36","volume-title":"Proceedings of the 44th Annual International Symposium on Computer Architecture","author":"N Jouppi","year":"2017","unstructured":"Jouppi N, Young C, Patil N, et al. In-datacenter performance analysis of a tensor processing unit. In: Proceedings of the 44th Annual International Symposium on Computer Architecture, 2017"},{"key":"4071_CR37","unstructured":"Collins A, Grover V. Axon: a language for dynamic shapes in deep learning graphs. 2022. ArXiv:2210.02374"},{"key":"4071_CR38","unstructured":"Lai R L, Shao J R, Feng S Y, et al. Relax: composable abstractions for end-to-end dynamic machine learning. 2023. ArXiv:2311.02103"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4071-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-024-4071-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-024-4071-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T20:24:25Z","timestamp":1763583865000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-024-4071-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,13]]},"references-count":38,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["4071"],"URL":"https:\/\/doi.org\/10.1007\/s11432-024-4071-6","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"type":"print","value":"1674-733X"},{"type":"electronic","value":"1869-1919"}],"subject":[],"published":{"date-parts":[[2024,9,13]]},"assertion":[{"value":"29 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 June 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 September 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"200403"}}