{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,10]],"date-time":"2025-05-10T06:05:27Z","timestamp":1746857127927,"version":"3.37.3"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T00:00:00Z","timestamp":1684281600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T00:00:00Z","timestamp":1684281600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,17]]},"DOI":"10.1109\/infocom53939.2023.10228858","type":"proceedings-article","created":{"date-parts":[[2023,8,29]],"date-time":"2023-08-29T17:40:43Z","timestamp":1693330843000},"page":"1-10","source":"Crossref","is-referenced-by-count":1,"title":["AGO: Boosting Mobile AI Inference Performance by Removing Constraints on Graph Optimization"],"prefix":"10.1109","author":[{"given":"Zhiying","family":"Xu","sequence":"first","affiliation":[{"name":"Nanjing University,State Key Laboratory for Novel Software Technology,China"}]},{"given":"Hongding","family":"Peng","sequence":"additional","affiliation":[{"name":"Nanjing University,State Key Laboratory for Novel Software Technology,China"}]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"Nanjing University,State Key Laboratory for Novel Software Technology,China"}]}],"member":"263","reference":[{"article-title":"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and <0.5mb model size","year":"2016","author":"iandola","key":"ref13"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507723"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"article-title":"Bolt: Bridging the gap between auto-tuners and hardware-native performance","year":"2021","author":"xing","key":"ref34"},{"key":"ref15","article-title":"Well-read students learn better: The impact of student initialization on knowledge distillation","author":"turc","year":"2019","journal-title":"CoRR"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807184"},{"key":"ref14","first-page":"116","article-title":"Shufflenet v2: Practical guidelines for efficient cnn architecture design","author":"ma","year":"2018","journal-title":"Proceedings of ECCV"},{"year":"0","key":"ref36","article-title":"Nvidia\/cutlass: Cuda templates for linear algebra subroutines"},{"article-title":"Alt: Breaking the wall between graph and operator level optimizations for deep learning compilation","year":"2022","author":"xu","key":"ref31"},{"key":"ref30","first-page":"37","article-title":"PET: Optimizing tensor programs with partially equivalent transformations and automated corrections","author":"wang","year":"2021","journal-title":"Proceedings of USENIX OSDI"},{"article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","year":"2017","author":"howard","key":"ref11"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"ref10","first-page":"1233","article-title":"Akg: automatic kernel generation for neural processing units using polyhedral transformations","author":"jie","year":"2021","journal-title":"Proceedings of ACM PLDI"},{"key":"ref32","first-page":"233","article-title":"{ROLLER}: Fast and efficient tensor compilation for deep learning","author":"zhu","year":"2022","journal-title":"Proceedings of USENIX OSDI"},{"key":"ref2","article-title":"Xla: Tensorflow, compiled","author":"leary","year":"2017","journal-title":"TensorFlow Dev Summit"},{"key":"ref1","article-title":"MNN: A universal and efficient inference engine","author":"jiang","year":"2020","journal-title":"Proceedings of MLSys"},{"key":"ref17","article-title":"Mobilevit: Light-weight, general-purpose, and mobile-friendly vision transformer","author":"mehta","year":"2022","journal-title":"Proceedings of ICLR"},{"article-title":"Ordering chaos: Memory-aware scheduling of irregularly wired neural networks for edge devices","year":"2020","author":"ahn","key":"ref39"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.insights-1.18"},{"article-title":"Ios: Inter-operator scheduler for cnn acceleration","year":"2021","author":"ding","key":"ref38"},{"key":"ref19","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"Proceedings of USENIX OSDI"},{"key":"ref18","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Proceedings of NeurIPS"},{"article-title":"Fusionstitching: boosting memory intensive computations for deep learning workloads","year":"2020","author":"zheng","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446759"},{"article-title":"Tuna: A static analysis approach to optimizing deep neural networks","year":"2021","author":"wang","key":"ref25"},{"year":"2021","key":"ref20","article-title":"Xnnpack: Highly optimized library of floating-point neural network inference operators for arm, webassembly, and x86 platforms"},{"key":"ref42","first-page":"267","article-title":"Unity: Accelerating DNN training through joint optimization of algebraic transformations and parallelization","author":"unger","year":"2022","journal-title":"Proceedings of USENIX OSDI"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419215"},{"article-title":"Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions","year":"2018","author":"vasilache","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref43","first-page":"249","article-title":"Walle: An End-to-End, General-Purpose, and Large-Scale production system for Device-Cloud collaborative machine learning","author":"lv","year":"2022","journal-title":"Proceedings of USENIX OSDI"},{"key":"ref28","article-title":"A deep learning based cost model for automatic code optimization","volume":"3","author":"baghdadi","year":"2021","journal-title":"Proceedings of MLSys"},{"key":"ref27","article-title":"Value learning for throughput optimization of deep learning workloads","volume":"3","author":"steiner","year":"2021","journal-title":"Proceedings of MLSys"},{"key":"ref29","article-title":"Equality saturation for tensor graph superoptimization","volume":"3","author":"yang","year":"2021","journal-title":"Proceedings of MLSys"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"},{"key":"ref7","first-page":"3389","article-title":"Learning to optimize tensor programs","author":"chen","year":"2018","journal-title":"Proceedings of NeurIPS"},{"key":"ref9","first-page":"863","article-title":"Ansor: generating high-performance tensor programs for deep learning","author":"zheng","year":"2020","journal-title":"Proceedings of USENIX OSDI"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"ref3","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","author":"chen","year":"2018","journal-title":"Proc of USENIX OSDI"},{"key":"ref6","article-title":"Apollo: Automatic partition-based operator fusion through layer by layer optimization","author":"zhao","year":"2022","journal-title":"Proceedings of MLSys"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3211346.3211348"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419194"}],"event":{"name":"IEEE INFOCOM 2023 - IEEE Conference on Computer Communications","start":{"date-parts":[[2023,5,17]]},"location":"New York City, NY, USA","end":{"date-parts":[[2023,5,20]]}},"container-title":["IEEE INFOCOM 2023 - IEEE Conference on Computer Communications"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10228851\/10228852\/10228858.pdf?arnumber=10228858","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,18]],"date-time":"2023-09-18T17:44:44Z","timestamp":1695059084000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10228858\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,17]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/infocom53939.2023.10228858","relation":{},"subject":[],"published":{"date-parts":[[2023,5,17]]}}}