{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T07:57:20Z","timestamp":1726041440832},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030296100"},{"type":"electronic","value":"9783030296117"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-29611-7_2","type":"book-chapter","created":{"date-parts":[[2019,8,12]],"date-time":"2019-08-12T23:02:59Z","timestamp":1565650979000},"page":"15-26","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Compiling Optimization for Neural Network Accelerators"],"prefix":"10.1007","author":[{"given":"Jin","family":"Song","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yimin","family":"Zhuang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaobing","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tian","family":"Zhi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaoli","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,8,9]]},"reference":[{"key":"2_CR1","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: International Conference on Neural Information Processing Systems, pp. 1097\u20131105. Curran Associates Inc. (2012)"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., et al.: Deep residual learning for image recognition, pp. 770\u2013778 (2015)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., et al.: ShuffleNet: an extremely efficient convolutional neural network for mobile devices (2017)","DOI":"10.1109\/CVPR.2018.00716"},{"key":"2_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot MultiBox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"2_CR5","unstructured":"Ren, S., He, K., Girshick, R., et al.: Faster R-CNN: towards real-time object detection with region proposal networks. In: International Conference on Neural Information Processing Systems, pp. 91\u201399. MIT Press (2015)"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., Beaufays, F.: Long short-term memory recurrent neural network architectures for large scale acoustic modeling. Comput. Sci. 338\u2013342 (2014)","DOI":"10.21437\/Interspeech.2014-80"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Graves, A., Jaitly, N., Mohamed, A.R.: Hybrid speech recognition with deep bidirectional LSTM. In: Automatic Speech Recognition and Understanding, pp. 273\u2013278. IEEE (2014)","DOI":"10.1109\/ASRU.2013.6707742"},{"issue":"7676","key":"2_CR8","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., et al.: Mastering the game of Go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","journal-title":"Nature"},{"issue":"7587","key":"2_CR9","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"2_CR10","unstructured":"OpenAI Five Homepage. https:\/\/blog.openai.com\/openai-five\/"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Venkatesh, G., Nurvitadhi, E., Marr, D.: Accelerating deep convolutional networks using low-precision and sparsity (2016)","DOI":"10.1109\/ICASSP.2017.7952679"},{"key":"2_CR12","unstructured":"Ovtcharov, K., Ruwase, O., Kim, J., et al.: Accelerating deep convolutional neural networks using specialized hardware. Miscellaneous (2015)"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Han, S., Liu, X., Mao, H., et al.: EIE: efficient inference engine on compressed deep neural network. In: International Symposium on Computer Architecture, pp. 243\u2013254. IEEE Press (2016)","DOI":"10.1109\/ISCA.2016.30"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, C., Li, P., Sun, G., et al.: Optimizing FPGA-based accelerator design for deep convolutional neural networks. In: ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays, pp. 161\u2013170. ACM (2015)","DOI":"10.1145\/2684746.2689060"},{"issue":"2","key":"2_CR15","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1145\/3140659.3080254","volume":"45","author":"Angshuman Parashar","year":"2017","unstructured":"Parashar, A., Rhu, M., Mukkara, A., et al.: SCNN: an accelerator for compressed-sparse convolutional neural networks, pp. 27\u201340 (2017)","journal-title":"ACM SIGARCH Computer Architecture News"},{"issue":"4","key":"2_CR16","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1145\/2644865.2541967","volume":"49","author":"T Chen","year":"2014","unstructured":"Chen, T., Du, Z., Sun, N.: DianNao: a small-footprint high-throughput accelerator for ubiquitous machine-learning. ACM SIGPLAN Not. 49(4), 269\u2013284 (2014)","journal-title":"ACM SIGPLAN Not."},{"issue":"11","key":"2_CR17","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1145\/2996864","volume":"59","author":"Y Chen","year":"2016","unstructured":"Chen, Y., Chen, T., Xu, Z.: DianNao family: energy-efficient hardware accelerators for machine learning. Commun. ACM 59(11), 105\u2013112 (2016)","journal-title":"Commun. ACM"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, S., Du, Z., Zhang, L., et al.: Cambricon-X: an accelerator for sparse neural networks. In: 2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). IEEE Computer Society (2016)","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Liu, S., Du, Z., Tao, J., et al.: Cambricon: an instruction set architecture for neural networks. In: ACM\/IEEE International Symposium on Computer Architecture, pp. 393\u2013405. IEEE (2016)","DOI":"10.1145\/3007787.3001179"},{"key":"2_CR20","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86, 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"2_CR21","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. Comput. Sci. (2014)"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., et al.: Going deeper with convolutions (2014)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., et al.: Inception-v4, Inception-ResNet and the impact of residual connections on learning (2016)","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"2_CR24","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions (2015)"},{"key":"2_CR25","unstructured":"Howard, A.G., Zhu, M., Chen, B., et al.: MobileNets: efficient convolutional neural networks for mobile vision applications (2017)"},{"key":"2_CR26","unstructured":"Iandola, F.N., Han, S., Moskewicz, M.W., et al.: SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5\u00a0MB model size (2016)"},{"key":"2_CR27","unstructured":"Abadi, M., Agarwal, A., Barham, P., et al.: TensorFlow: large-scale machine learning on heterogeneous distributed systems (2016)"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., et al.: Caffe: convolutional architecture for fast feature embedding, pp. 675\u2013678 (2014)","DOI":"10.1145\/2647868.2654889"},{"key":"2_CR29","unstructured":"Chen, T., Li, M., Li, Y., et al.: MXNet: a flexible and efficient machine learning library for heterogeneous distributed systems. Statistics (2015)"},{"issue":"3","key":"2_CR30","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1145\/212094.212131","volume":"27","author":"VH Allan","year":"1995","unstructured":"Allan, V.H., Jones, R.B., Lee, R.M., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367\u2013432 (1995)","journal-title":"ACM Comput. Surv."},{"key":"2_CR31","unstructured":"Gray, A., Gottbrath, C., Olson, R., Prasanna, S., et al.: Production deep learning with NVIDIA GPU inference engine. https:\/\/devblogs.nvidia.com\/production-deep-learning-nvidia-gpu-inference-engine\/"},{"issue":"3","key":"2_CR32","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"issue":"6","key":"2_CR33","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1145\/2980983.2908105","volume":"51","author":"Leonard Truong","year":"2016","unstructured":"Truong, L., Barik, R., Totoni, E., et al.: Latte: a language, compiler, and runtime for elegant and efficient deep neural networks. In: ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 209\u2013223. ACM (2016)","journal-title":"ACM SIGPLAN Notices"},{"key":"2_CR34","unstructured":"Chen, T., Moreau, T., Jiang, Z., et al.: TVM: an automated end-to-end optimizing compiler for deep learning (2018)"},{"issue":"1","key":"2_CR35","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1145\/3150211","volume":"61","author":"J Ragankelley","year":"2018","unstructured":"Ragankelley, J., Adams, A., Sharlet, D., et al.: Halide: decoupling algorithms from schedules for high-performance image processing. Commun. ACM 61(1), 106\u2013115 (2018)","journal-title":"Commun. ACM"},{"key":"2_CR36","unstructured":"Cyphers, S., Bansal, A.K., Bhiwandiwalla, A., et al.: Intel nGraph: an intermediate representation, compiler, and executor for deep learning (2018)"}],"container-title":["Lecture Notes in Computer Science","Advanced Parallel Processing Technologies"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-29611-7_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,25]],"date-time":"2022-09-25T14:06:32Z","timestamp":1664114792000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-29611-7_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030296100","9783030296117"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-29611-7_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"9 August 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APPT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Advanced Parallel Processing Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 August 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 August 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"appt2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tc.ccf.org.cn\/tcarch\/appt2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}