{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T07:20:02Z","timestamp":1742973602473,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":18,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811527661"},{"type":"electronic","value":"9789811527678"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-981-15-2767-8_42","type":"book-chapter","created":{"date-parts":[[2020,1,25]],"date-time":"2020-01-25T15:02:35Z","timestamp":1579964555000},"page":"479-491","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Minimizing Off-Chip Memory Access for Deep Convolutional Neural Network Training"],"prefix":"10.1007","author":[{"given":"Jijun","family":"Wang","sequence":"first","affiliation":[]},{"given":"Hongliang","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,1,26]]},"reference":[{"issue":"4","key":"42_CR1","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber, J.: Deep learning in neural networks: an overview. Neural Netw. 61(4), 85\u2013117 (2015)","journal-title":"Neural Netw."},{"unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Proceedings of the 32nd International Conference on Machine Learning, pp. 448\u2013456. IEEE, Lile (2015)","key":"42_CR2"},{"issue":"2","key":"42_CR3","first-page":"770","volume":"53","author":"K He","year":"2016","unstructured":"He, K., Zhang, X., Ren, S., et al.: Deep residual learning for image recognition. Comput. Vis. Pattern Recognit. 53(2), 770\u2013778 (2016)","journal-title":"Comput. Vis. Pattern Recognit."},{"doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., et al.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826. IEEE, Las Vegas (2016)","key":"42_CR4","DOI":"10.1109\/CVPR.2016.308"},{"doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., et al.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708. IEEE, Honolulu (2017)","key":"42_CR5","DOI":"10.1109\/CVPR.2017.243"},{"issue":"2","key":"42_CR6","first-page":"125","volume":"53","author":"J Redmon","year":"2018","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement. Comput. Vis. Pattern Recognit. 53(2), 125\u2013136 (2018)","journal-title":"Comput. Vis. Pattern Recognit."},{"unstructured":"Narang, S., Diamos, G., Elsen, E., et al.: Mixed precision training[OL], 25 December 2018. \nhttps:\/\/arxiv.org\/pdf\/1710.03740.pdf","key":"42_CR7"},{"unstructured":"NVIDIA TESLA V100 GPU architecture. The world\u2019s most advanced data center GPU[EB\/OL], 10 October 2018. \nhttps:\/\/devblogs.nvidia.com\/inside-volta\/","key":"42_CR8"},{"unstructured":"NVIDIA TESLA P100. the most advanced datacenter accelerator ever built featuring Pascal GP100[OL], 7 June 2018. \nhttps:\/\/www.nvidia.com\/o-bject\/pascal-architecture-whitepaper.html","key":"42_CR9"},{"issue":"4","key":"42_CR10","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for floating-point programs and multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105. IEEE, Lake Tahoe (2012)","key":"42_CR11"},{"unstructured":"Google Inc.: TPUv2[OL], 7 January 2019. \nhttps:\/\/www.tomshardware.com\/ne-ws\/tpu-v2-google-machine-learning-35370.html","key":"42_CR12"},{"doi-asserted-by":"crossref","unstructured":"Li, J., Yan, G., Lu, W., et al.: TNPU: an efficient accelerator architecture for training convolutional neural networks. In: Proceedings of the 24th Asia and South Pacific Design Automation Conference, pp. 450\u2013455. ACM, Tokyo (2019)","key":"42_CR13","DOI":"10.1145\/3287624.3287641"},{"doi-asserted-by":"crossref","unstructured":"Chen, Y., Luo, T., Liu, S., et al.: Dadiannao: a machine-learning supercomputer. In: Proceedings of the 47th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 609\u2013622. IEEE, Cambridge (2014)","key":"42_CR14","DOI":"10.1109\/MICRO.2014.58"},{"doi-asserted-by":"crossref","unstructured":"Shen, Y., Ferdman, M., Milder, P.: Escher: a CNN accelerator with flexible buffering to minimize off-chip transfer. In: 2017 IEEE 25th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), pp. 93\u2013100. IEEE, Napa (2017)","key":"42_CR15","DOI":"10.1109\/FCCM.2017.47"},{"doi-asserted-by":"crossref","unstructured":"Li, J., Yan, G., Lu, W., et al.: SmartShuttle: optimizing off-chip memory accesses for deep learning accelerators. In: 2018 Design, Automation & Test in Europe Conference & Exhibition (DATE), pp. 343\u2013348. IEEE, Dresden (2018)","key":"42_CR16","DOI":"10.23919\/DATE.2018.8342033"},{"unstructured":"Chen, T., Xu, B., Zhang, C., et al.: Training deep nets with sublinear memory cost[OL], 5 January 2019. \nhttps:\/\/arxiv.org\/pdf\/1604.06174.pdf","key":"42_CR17"},{"doi-asserted-by":"crossref","unstructured":"Jain, A., Phanishayee, A., Mars, J., et al.: Gist: efficient data encoding for deep neural network training. In: 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), pp. 776\u2013789. IEEE, Los Angeles (2018)","key":"42_CR18","DOI":"10.1109\/ISCA.2018.00070"}],"container-title":["Communications in Computer and Information Science","Parallel Architectures, Algorithms and Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-15-2767-8_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,1,26]],"date-time":"2020-01-26T01:02:39Z","timestamp":1580000559000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-15-2767-8_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9789811527661","9789811527678"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-981-15-2767-8_42","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"26 January 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Parallel Architectures, Algorithms and Programming","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 December 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"paap2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/sdcs.sysu.edu.cn\/paap2019","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"121","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"39","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}