{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T23:42:19Z","timestamp":1740181339029,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s42514-022-00105-z","type":"journal-article","created":{"date-parts":[[2022,6,7]],"date-time":"2022-06-07T06:02:53Z","timestamp":1654581773000},"page":"87-103","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A systematic study on benchmarking AI inference accelerators"],"prefix":"10.1007","volume":"4","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0632-7402","authenticated-orcid":false,"given":"Zihan","family":"Jiang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiansong","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fangxin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wanling","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chuanxin","family":"Lan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,7]]},"reference":[{"key":"105_CR1","unstructured":"AnandTech: https:\/\/www.anandtech.com\/show\/12815\/cambricon-makers-of-huaweis-kirin-npu-ip-build-a-big-ai-chip-and-pcie-card, (2018)"},{"key":"105_CR2","unstructured":"Cambricon: Cambricon cnrt. http:\/\/www.cambricon.com\/index.php?m=content&c=index&a=lists&catid=71"},{"key":"105_CR3","unstructured":"Cambricon MLU100, http:\/\/www.cambricon.com\/index.php?c=page&id=20"},{"key":"105_CR4","unstructured":"Chen, W. et al.: Compressing neural networks with the hashing trick. In: Proceedings of the International Conference on Machine Learning, pp. 2285\u20132294 (2015)"},{"issue":"4","key":"105_CR5","first-page":"269","volume":"49","author":"T Chen","year":"2014","unstructured":"Chen, T., et al.: Diannao: A small-footprint high-throughput accelerator for ubiquitous machine-learning. ACM ASPLOS 49(4), 269\u2013284 (2014)","journal-title":"ACM ASPLOS"},{"key":"105_CR6","unstructured":"Courbariaux, et al. (2015) Binaryconnect: Training deep neural networks with binary weights during propagations. In: NeurIPS, pp. 3123\u20133131"},{"key":"105_CR7","unstructured":"DeepBench, https:\/\/github.com\/baidu-research\/DeepBench"},{"key":"105_CR8","first-page":"2148","volume":"26","author":"M Denil","year":"2013","unstructured":"Denil, M., et al.: Predicting parameters in deep learning. Adv Neural Inform Process Syst 26, 2148\u20132156 (2013)","journal-title":"Adv Neural Inform Process Syst"},{"key":"105_CR9","unstructured":"Dean J et al. (2012) Large scale distributed deep networks. In: Advances in Neural Information Processing Systems 25. Curran Associates, Inc., pp. 1223\u20131231"},{"key":"105_CR10","doi-asserted-by":"crossref","unstructured":"Deng J, et al. (2009) Imagenet: A large-scale hierarchical image database. In: IEEE conference on computer vision and pattern recognition, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"105_CR11","unstructured":"Everingham, M. et al. (2012) The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results"},{"key":"105_CR12","unstructured":"Google: Edge-tpu. https:\/\/cloud.google.com\/edge-tpu"},{"key":"105_CR13","unstructured":"Google: What Makes TPU Fine Tuned to Deep Learning. https:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/what-makes-tpus-fine-tuned-for-deep-learning"},{"key":"105_CR14","doi-asserted-by":"crossref","unstructured":"Gray J (1993) Database and transaction processing performance handbook","DOI":"10.1145\/130283.130288"},{"key":"105_CR15","doi-asserted-by":"crossref","unstructured":"Hao T et al. (2018) Edge AIBench: towards comprehensive end-to-end edge computing benchmarking. International Symposium on Benchmarking, Measuring and Optimization, Springer, Cham, pp. 23-30","DOI":"10.1007\/978-3-030-32813-9_3"},{"key":"105_CR16","unstructured":"Han S, Mao H, Dally WJ (2016) Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. ICLR"},{"issue":"2","key":"105_CR17","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1145\/3282307","volume":"62","author":"JL Hennessy","year":"2019","unstructured":"Hennessy, J.L., Patterson, D.A.: A new golden age for computer architecture. Commun ACM 62(2), 48\u201360 (2019)","journal-title":"Commun ACM"},{"key":"105_CR18","unstructured":"He K et al. (2015) Deep residual learning for image recognition. CoRR, vol. abs\/1512.03385"},{"key":"105_CR19","unstructured":"Huawei: Huawei Ascend 310 Accelerator. http:\/\/ascend.huawei.com (2020)"},{"key":"105_CR20","unstructured":"Huang G et al. (2016) Densely connected convolutional networks. CoRR, vol. abs\/1608.06993"},{"key":"105_CR21","unstructured":"Howard AG et al. (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. CoRR, vol. abs\/1704.04861"},{"key":"105_CR22","unstructured":"Iandola FN et al. (2016) Squeezenet: Alexnet-level accuracy with 50x fewer parameters and $$<$$1mb model size. CoRR, vol. abs\/1602.07360"},{"key":"105_CR23","first-page":"112","volume":"2","author":"S Jain","year":"2020","unstructured":"Jain, S., et al.: Trained quantization thresholds for accurate and efficient fixed-point inference of deep neural networks. Proc Mach Learn Syst 2, 112\u2013128 (2020)","journal-title":"Proc Mach Learn Syst"},{"key":"105_CR24","doi-asserted-by":"crossref","unstructured":"Jiang Z et al. (2021) Hpc ai500 v2. 0: The methodology, tools, and metrics for benchmarking hpc ai systems. IEEE CLUSTER","DOI":"10.1109\/Cluster48925.2021.00022"},{"key":"105_CR25","unstructured":"Jouppi, N.P. et al.: In-datacenter performance analysis of a tensor processing unit. In: ACM\/IEEE ISCA. IEEE, pp. 1\u201312 (2017)"},{"key":"105_CR26","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp. 1097\u20131105"},{"key":"105_CR27","unstructured":"Lee D, Kim B (2018) Retraining-based iterative weight quantization for deep neural networks. CoRR, vol. abs\/1805.11233"},{"key":"105_CR28","doi-asserted-by":"crossref","unstructured":"Li J. et\u00a0al.: Characterizing the i\/o pipeline in the deployment of cnns on commercial accelerators. IEEE Intl Conf on Parallel & Distributed Processing with Applications, Big Data & Cloud Computing, Sustainable Computing & Communications, Social Computing & Networking. IEEE, pp. 137-144 (2020)","DOI":"10.1109\/ISPA-BDCloud-SocialCom-SustainCom51426.2020.00043"},{"issue":"3","key":"105_CR29","first-page":"393","volume":"44","author":"S Liu","year":"2016","unstructured":"Liu, S., et al.: Cambricon: an instruction set architecture for neural networks. ACM\/IEEE ISCA 44(3), 393\u2013405 (2016)","journal-title":"ACM\/IEEE ISCA"},{"key":"105_CR30","unstructured":"Liu W et al.: Ssd: single shot multibox detector. (2016), to appear. [Online]. http:\/\/arxiv.org\/abs\/1512.02325"},{"key":"105_CR31","doi-asserted-by":"crossref","unstructured":"Luo C et al. (2018) AIoT bench: towards comprehensive benchmarking mobile and embedded device intelligence. International Symposium on Benchmarking, pp. 31\u201335. Springer, Cham, Measuring and Optimization","DOI":"10.1007\/978-3-030-32813-9_4"},{"key":"105_CR32","unstructured":"Ma X et al. (2019) PCONV: the missing but desirable sparsity in DNN weight pruning for real-time execution on mobile devices. CoRR, vol. abs\/1909.05073"},{"key":"105_CR33","unstructured":"Mishra R et al. (2020) A Survey on Deep Neural Network Compression: Challenges, Overview, and Solutions. CoRR, vol. abs\/2010.03954"},{"key":"105_CR34","doi-asserted-by":"crossref","unstructured":"Mittal D et al. (2018)ecovering from random pruning: On the plasticity of deep convolutional neural networks. CoRR, vol. abs\/1801.10447","DOI":"10.1109\/WACV.2018.00098"},{"key":"105_CR35","doi-asserted-by":"crossref","unstructured":"Niu W et al. (2020) Patdnn: Achieving real-time dnn execution on mobile devices with pattern-based weight pruning. In: ACM ASPLOS, pp. 907\u2013922","DOI":"10.1145\/3373376.3378534"},{"key":"105_CR36","doi-asserted-by":"crossref","unstructured":"Reddi VJ et al. (2020) Mlperf inference benchmark. In: ACM\/IEEE ISCA, pp. 446\u2013459","DOI":"10.1109\/ISCA45697.2020.00045"},{"issue":"3","key":"105_CR37","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1109\/MSSC.2020.3002140","volume":"12","author":"V Sze","year":"2020","unstructured":"Sze, V., et al.: How to evaluate deep neural network processors: Tops\/w (alone) considered harmful. IEEE Solid-State Circ Mag 12(3), 28\u201341 (2020)","journal-title":"IEEE Solid-State Circ Mag"},{"key":"105_CR38","doi-asserted-by":"crossref","unstructured":"Tang F et al. (2021) AIBench Training: Balanced Industry-Standard AI Training Benchmarking. IEEE Computer Society, In IEEE ISPASS","DOI":"10.1109\/ISPASS51385.2021.00014"},{"issue":"1","key":"105_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11390-018-1805-8","volume":"33","author":"J-H Tao","year":"2018","unstructured":"Tao, J.-H., et al.: Bench ip: Bencharking intelligence processors. J Comput Sci Technol 33(1), 1\u201323 (2018)","journal-title":"J Comput Sci Technol"},{"key":"105_CR40","doi-asserted-by":"crossref","unstructured":"Turner J et al. (2018) Characterising across-stack optimisations for deep convolutional neural networks. In: IISWC, pp 101\u2013110","DOI":"10.1109\/IISWC.2018.8573503"},{"key":"105_CR41","unstructured":"Wang Y et al. A systematic methodology for analysis of deep learning hardware and software platforms. In: Proceedings of Machine Learning and Systems"},{"issue":"4","key":"105_CR42","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., et al.: Roofline: an insightful visual performance model for multicore architectures. Commun ACM 52(4), 65\u201376 (2009)","journal-title":"Commun ACM"},{"key":"105_CR43","first-page":"7543","volume":"97","author":"R Zhao","year":"2019","unstructured":"Zhao, R., et al.: Improving neural network quantization without retraining using outlier channel splitting. Ser Proc Mach Learn Res 97, 7543\u20137552 (2019). (PMLR)","journal-title":"Ser Proc Mach Learn Res"},{"key":"105_CR44","unstructured":"Zhou A, Yao A, Guo Y, Xu L, Chen Y Incremental network quantization: Towards lossless cnns with low-precision weights. CoRR, vol. abs\/1702.03044, (2017). [Online]. http:\/\/arxiv.org\/abs\/1702.03044"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-022-00105-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-022-00105-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-022-00105-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,9]],"date-time":"2022-09-09T11:44:51Z","timestamp":1662723891000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-022-00105-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6]]},"references-count":44,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["105"],"URL":"https:\/\/doi.org\/10.1007\/s42514-022-00105-z","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"type":"print","value":"2524-4922"},{"type":"electronic","value":"2524-4930"}],"subject":[],"published":{"date-parts":[[2022,6]]},"assertion":[{"value":"23 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 April 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 June 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}