{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T19:39:28Z","timestamp":1771616368910,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T00:00:00Z","timestamp":1661731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,29]]},"DOI":"10.1145\/3545008.3545051","type":"proceedings-article","created":{"date-parts":[[2023,1,15]],"date-time":"2023-01-15T01:04:08Z","timestamp":1673744648000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["NNLQP: A Multi-Platform Neural Network Latency Query and Prediction System with An Evolving Database"],"prefix":"10.1145","author":[{"given":"Liang","family":"Liu","sequence":"first","affiliation":[{"name":"SenseTime, China"}]},{"given":"Mingzhu","family":"Shen","sequence":"additional","affiliation":[{"name":"SenseTime, China"}]},{"given":"Ruihao","family":"Gong","sequence":"additional","affiliation":[{"name":"SenseTime, China and Beihang University, China"}]},{"given":"Fengwei","family":"Yu","sequence":"additional","affiliation":[{"name":"SenseTime, China"}]},{"given":"Hailong","family":"Yang","sequence":"additional","affiliation":[{"name":"Beihang University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"ONNX: Open Neural Network Exchange. https:\/\/github.com\/onnx\/onnx.","author":"Bai Junjie","year":"2019","unstructured":"Junjie Bai, Fang Lu, Ke Zhang, 2019. ONNX: Open Neural Network Exchange. https:\/\/github.com\/onnx\/onnx."},{"key":"e_1_3_2_1_2_1","unstructured":"Han Cai Chuang Gan Tianzhe Wang Zhekai Zhang and Song Han. 2020. Once-for-All: Train One Network and Specialize it for Efficient Deployment. arxiv:1908.09791\u00a0[cs.LG]"},{"key":"e_1_3_2_1_3_1","volume-title":"Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332(2018).","author":"Cai Han","year":"2018","unstructured":"Han Cai, Ligeng Zhu, and Song Han. 2018. Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332(2018)."},{"key":"e_1_3_2_1_4_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594."},{"key":"e_1_3_2_1_5_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arxiv:1810.04805\u00a0[cs.CL]","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arxiv:1810.04805\u00a0[cs.CL]"},{"key":"e_1_3_2_1_6_1","unstructured":"TensorRT Documentation. 2021. Optimizing for Tensor Cores. https:\/\/docs.nvidia.com\/deeplearning\/tensorrt\/developer-guide\/index.html#optimize-tensor-cores."},{"key":"e_1_3_2_1_7_1","volume-title":"Brp-nas: Prediction-based nas using gcns. arXiv preprint arXiv:2007.08668(2020).","author":"Dudziak \u0141ukasz","year":"2020","unstructured":"\u0141ukasz Dudziak, Thomas Chau, Mohamed\u00a0S Abdelfattah, Royson Lee, Hyeji Kim, and Nicholas\u00a0D Lane. 2020. Brp-nas: Prediction-based nas using gcns. arXiv preprint arXiv:2007.08668(2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"Differentiable Soft Quantization: Bridging Full-Precision and Low-Bit Neural Networks. In The IEEE International Conference on Computer Vision (ICCV).","author":"Gong Ruihao","year":"2019","unstructured":"Ruihao Gong, Xianglong Liu, Shenghu Jiang, Tianxiang Li, Peng Hu, Jiazhen Lin, Fengwei Yu, and Junjie Yan. 2019. Differentiable Soft Quantization: Bridging Full-Precision and Low-Bit Neural Networks. In The IEEE International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_1_9_1","volume-title":"O\u2019Reilly Media","author":"Grinberg Miguel","unstructured":"Miguel Grinberg. 2018. Flask web development: developing web applications with python. \u201d O\u2019Reilly Media, Inc.\u201d."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems. 1025\u20131035","author":"Hamilton L","year":"2017","unstructured":"William\u00a0L Hamilton, Rex Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. In Proceedings of the 31st International Conference on Neural Information Processing Systems. 1025\u20131035."},{"key":"e_1_3_2_1_12_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arxiv:1512.03385\u00a0[cs.CV]"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"e_1_3_2_1_14_1","unstructured":"Forrest\u00a0N Iandola Song Han Matthew\u00a0W Moskewicz Khalid Ashraf William\u00a0J Dally and Kurt Keutzer. 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. arXiv preprint arXiv:1602.07360(2016)."},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. Workshop ML Syst. NeurIPS. 1\u20136.","author":"Kaufman Samuel","year":"2019","unstructured":"Samuel Kaufman, Phitchaya\u00a0Mangpo Phothilimthana, and Mike Burrows. 2019. Learned TPU cost model for XLA tensor programs. In Proc. Workshop ML Syst. NeurIPS. 1\u20136."},{"key":"e_1_3_2_1_16_1","unstructured":"Samuel\u00a0J Kaufman Phitchaya\u00a0Mangpo Phothilimthana Yanqi Zhou Charith Mendis Sudip Roy Amit Sabne and Mike Burrows. 2020. A Learned Performance Model for Tensor Processing Units. arXiv preprint arXiv:2008.01040(2020)."},{"key":"e_1_3_2_1_17_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014).","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014)."},{"key":"e_1_3_2_1_18_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey\u00a0E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012)."},{"key":"e_1_3_2_1_19_1","volume-title":"Hw-nas-bench: Hardware-aware neural architecture search benchmark. arXiv preprint arXiv:2103.10584(2021).","author":"Li Chaojian","year":"2021","unstructured":"Chaojian Li, Zhongzhi Yu, Yonggan Fu, Yongan Zhang, Yang Zhao, Haoran You, Qixuan Yu, Yue Wang, and Yingyan Lin. 2021. Hw-nas-bench: Hardware-aware neural architecture search benchmark. arXiv preprint arXiv:2103.10584(2021)."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=POWv6hDd9XH","author":"Li Yuhang","year":"2021","unstructured":"Yuhang Li, Ruihao Gong, Xu Tan, Yang Yang, Peng Hu, Qi Zhang, Fengwei Yu, Wei Wang, and Shi Gu. 2021. BRECQ: Pushing the Limit of Post-Training Quantization by Block Reconstruction. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=POWv6hDd9XH"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Yuhang Li Mingzhu Shen Jian Ma Yan Ren Mingxin Zhao Qi Zhang Ruihao Gong Fengwei Yu and Junjie Yan. 2021. MQBench: Towards Reproducible and Deployable Model Quantization Benchmark. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1). https:\/\/openreview.net\/forum?id=TUplOmF8DsM","DOI":"10.1109\/IJCNN52387.2021.9533785"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875654"},{"key":"e_1_3_2_1_23_1","first-page":"11711","article-title":"Mcunet: Tiny deep learning on iot devices","volume":"33","author":"Lin Ji","year":"2020","unstructured":"Ji Lin, Wei-Ming Chen, Yujun Lin, Chuang Gan, Song Han, 2020. Mcunet: Tiny deep learning on iot devices. Advances in Neural Information Processing Systems 33 (2020), 11711\u201311722.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","unstructured":"Tsung-Yi Lin Priya Goyal Ross Girshick Kaiming He and Piotr Doll\u00e1r. 2018. Focal Loss for Dense Object Detection. arxiv:1708.02002\u00a0[cs.CV]"},{"key":"e_1_3_2_1_25_1","volume-title":"Nvidia tensor core programmability, performance & precision. In 2018 IEEE international parallel and distributed processing symposium workshops (IPDPSW)","author":"Markidis Stefano","unstructured":"Stefano Markidis, Steven\u00a0Wei Der\u00a0Chien, Erwin Laure, Ivy\u00a0Bo Peng, and Jeffrey\u00a0S Vetter. 2018. Nvidia tensor core programmability, performance & precision. In 2018 IEEE international parallel and distributed processing symposium workshops (IPDPSW). IEEE, 522\u2013531."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3058217"},{"key":"e_1_3_2_1_27_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019), 8026\u20138037."},{"key":"e_1_3_2_1_28_1","volume-title":"LETI: Latency Estimation Tool and Investigation of Neural Networks inference on Mobile GPU. arXiv preprint arXiv:2010.02871(2020).","author":"Ponomarev Evgeny","year":"2020","unstructured":"Evgeny Ponomarev, Sergey Matveev, and Ivan Oseledets. 2020. LETI: Latency Estimation Tool and Investigation of Neural Networks inference on Mobile GPU. arXiv preprint arXiv:2010.02871(2020)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107281"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"e_1_3_2_1_31_1","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2016. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. arxiv:1506.01497\u00a0[cs.CV]"},{"key":"e_1_3_2_1_32_1","unstructured":"Jaehun Ryu and Hyojin Sung. 2021. MetaTune: Meta-Learning Based Cost Model for Fast and Efficient Auto-tuning Frameworks. arxiv:2102.04199\u00a0[cs.LG]"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00256"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Mingzhu Shen Feng Liang Ruihao Gong Yuhang Li Chuming Li Chen Lin Fengwei Yu Junjie Yan and Wanli Ouyang. 2021. Once Quantization-Aware Training: High Performance Extremely Low-bit Architecture Search. arxiv:2010.04354\u00a0[cs.CV]","DOI":"10.1109\/ICCV48922.2021.00529"},{"key":"e_1_3_2_1_36_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"e_1_3_2_1_39_1","volume-title":"International conference on machine learning. PMLR, 6105\u20136114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105\u20136114."},{"key":"e_1_3_2_1_40_1","volume-title":"QDrop: Randomly Dropping Quantization for Extremely Low-bit Post-Training Quantization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=ySQH0oDyp7","author":"Wei Xiuying","year":"2022","unstructured":"Xiuying Wei, Ruihao Gong, Yuhang Li, Xianglong Liu, and Fengwei Yu. 2022. QDrop: Randomly Dropping Quantization for Extremely Low-bit Post-Training Quantization. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=ySQH0oDyp7"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"},{"key":"e_1_3_2_1_42_1","unstructured":"Lianmin Zheng Ruochen Liu Junru Shao Tianqi Chen Joseph\u00a0E. Gonzalez Ion Stoica and Ameer\u00a0Haj Ali. 2021. TenSet: A Large-scale Program Performance Dataset for Learned Tensor Compilers. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1). https:\/\/openreview.net\/forum?id=aIfp8kLuvc9"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00204"}],"event":{"name":"ICPP '22: 51st International Conference on Parallel Processing","location":"Bordeaux France","acronym":"ICPP '22"},"container-title":["Proceedings of the 51st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545051","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3545008.3545051","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:44Z","timestamp":1750186964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545051"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,29]]},"references-count":42,"alternative-id":["10.1145\/3545008.3545051","10.1145\/3545008"],"URL":"https:\/\/doi.org\/10.1145\/3545008.3545051","relation":{},"subject":[],"published":{"date-parts":[[2022,8,29]]},"assertion":[{"value":"2023-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}