{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T20:49:42Z","timestamp":1765486182592,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,2,10]],"date-time":"2018-02-10T00:00:00Z","timestamp":1518220800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"VMware"},{"name":"Mellanox"},{"name":"Oracle"},{"name":"Natural Science Foundation of China","award":["61572111"],"award-info":[{"award-number":["61572111"]}]},{"name":"Google"},{"name":"DARPA","award":["16-43-D3M-FP-040"],"award-info":[{"award-number":["16-43-D3M-FP-040"]}]},{"name":"Central Universities of China","award":["ZYGX2016Z003"],"award-info":[{"award-number":["ZYGX2016Z003"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,2,10]]},"DOI":"10.1145\/3178487.3178491","type":"proceedings-article","created":{"date-parts":[[2018,2,6]],"date-time":"2018-02-06T13:12:23Z","timestamp":1517922743000},"page":"41-53","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":167,"title":["Superneurons"],"prefix":"10.1145","author":[{"given":"Linnan","family":"Wang","sequence":"first","affiliation":[{"name":"Brown University"}]},{"given":"Jinmian","family":"Ye","sequence":"additional","affiliation":[{"name":"Univ. of Electr. Sci. &amp; Tech. of China"}]},{"given":"Yiyang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Univ. of Electr. Sci. &amp; Tech. of China"}]},{"given":"Wei","family":"Wu","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory"}]},{"given":"Ang","family":"Li","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory"}]},{"given":"Shuaiwen Leon","family":"Song","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory"}]},{"given":"Zenglin","family":"Xu","sequence":"additional","affiliation":[{"name":"Univ. of Electr. Sci. &amp; Tech. of China"}]},{"given":"Tim","family":"Kraska","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology and Brown University"}]}],"member":"320","published-online":{"date-parts":[[2018,2,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Mxnet's graph representation of neural networks. http:\/\/mxnet.io\/architecture\/note_memory.html.  Mxnet's graph representation of neural networks. http:\/\/mxnet.io\/architecture\/note_memory.html."},{"key":"e_1_3_2_1_2_1","first-page":"265","volume-title":"OSDI","volume":"16","author":"Abadi M.","year":"2016","unstructured":"M. Abadi , P. Barham , J. Chen , Z. Chen , A. Davis , J. Dean , M. Devin , S. Ghemawat , G. Irving , M. Isard , : A system for large-scale machine learning . In OSDI , volume 16 , pages 265 -- 283 , 2016 . M. Abadi, P. Barham, J. Chen, Z. Chen, A. Davis, J. Dean, M. Devin, S. Ghemawat, G. Irving, M. Isard, et al. Tensor-flow: A system for large-scale machine learning. In OSDI, volume 16, pages 265--283, 2016."},{"key":"e_1_3_2_1_3_1","volume-title":"Comparative study of caffe, neon, theano, and torch for deep learning","author":"Bahrampour S.","year":"2016","unstructured":"S. Bahrampour , N. Ramakrishnan , L. Schott , and M. Shah . Comparative study of caffe, neon, theano, and torch for deep learning . 2016 . S. Bahrampour, N. Ramakrishnan, L. Schott, and M. Shah. Comparative study of caffe, neon, theano, and torch for deep learning. 2016."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"key":"e_1_3_2_1_5_1","volume-title":"Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274","author":"Chen T.","year":"2015","unstructured":"T. Chen , M. Li , Y. Li , M. Lin , N. Wang , M. Wang , T. Xiao , B. Xu , C. Zhang , and Z. Zhang . Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274 , 2015 . T. Chen, M. Li, Y. Li, M. Lin, N. Wang, M. Wang, T. Xiao, B. Xu, C. Zhang, and Z. Zhang. Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274, 2015."},{"key":"e_1_3_2_1_6_1","volume-title":"Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174","author":"Chen T.","year":"2016","unstructured":"T. Chen , B. Xu , C. Zhang , and C. Guestrin . Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 , 2016 . T. Chen, B. Xu, C. Zhang, and C. Guestrin. Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174, 2016."},{"key":"e_1_3_2_1_7_1","volume-title":"cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759","author":"Chetlur S.","year":"2014","unstructured":"S. Chetlur , C. Woolley , P. Vandermersch , J. Cohen , J. Tran , B. Catanzaro , and E. Shelhamer . cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759 , 2014 . S. Chetlur, C. Woolley, P. Vandermersch, J. Cohen, J. Tran, B. Catanzaro, and E. Shelhamer. cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759, 2014."},{"key":"e_1_3_2_1_8_1","first-page":"1337","volume-title":"International Conference on Machine Learning","author":"Coates A.","year":"2013","unstructured":"A. Coates , B. Huval , T. Wang , D. Wu , B. Catanzaro , and N. Andrew . Deep learning with cots hpc systems . In International Conference on Machine Learning , pages 1337 -- 1345 , 2013 . A. Coates, B. Huval, T. Wang, D. Wu, B. Catanzaro, and N. Andrew. Deep learning with cots hpc systems. In International Conference on Machine Learning, pages 1337--1345, 2013."},{"key":"e_1_3_2_1_9_1","volume-title":"Idiap","author":"Collobert R.","year":"2002","unstructured":"R. Collobert , S. Bengio , and J. Mari\u00e9thoz . Torch: a modular machine learning software library. Technical report , Idiap , 2002 . R. Collobert, S. Bengio, and J. Mari\u00e9thoz. Torch: a modular machine learning software library. Technical report, Idiap, 2002."},{"key":"e_1_3_2_1_10_1","first-page":"1223","volume-title":"Advances in neural information processing systems","author":"Dean J.","year":"2012","unstructured":"J. Dean , G. Corrado , R. Monga , K. Chen , M. Devin , M. Mao , A. Senior , P. Tucker , K. Yang , Q. V. Le , Large scale distributed deep networks . In Advances in neural information processing systems , pages 1223 -- 1231 , 2012 . J. Dean, G. Corrado, R. Monga, K. Chen, M. Devin, M. Mao, A. Senior, P. Tucker, K. Yang, Q. V. Le, et al. Large scale distributed deep networks. In Advances in neural information processing systems, pages 1223--1231, 2012."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_13_1","volume-title":"Densely connected convolutional networks. arXiv preprint arXiv:1608.06993","author":"Huang G.","year":"2016","unstructured":"G. Huang , Z. Liu , K. Q. Weinberger , and L. van der Maaten . Densely connected convolutional networks. arXiv preprint arXiv:1608.06993 , 2016 . G. Huang, Z. Liu, K. Q. Weinberger, and L. van der Maaten. Densely connected convolutional networks. arXiv preprint arXiv:1608.06993, 2016."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926294"},{"key":"e_1_3_2_1_16_1","first-page":"1097","volume-title":"Advances in neural information processing systems","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky , I. Sutskever , and G. E. Hinton . Imagenet classification with deep convolutional neural networks . In Advances in neural information processing systems , pages 1097 -- 1105 , 2012 . A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems, pages 1097--1105, 2012."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_18_1","volume-title":"Memory-efficient implementation of densenets. arXiv preprint arXiv:1707.06990","author":"Pleiss G.","year":"2017","unstructured":"G. Pleiss , D. Chen , G. Huang , T. Li , L. van der Maaten , and K. Q. Weinberger . Memory-efficient implementation of densenets. arXiv preprint arXiv:1707.06990 , 2017 . G. Pleiss, D. Chen, G. Huang, T. Li, L. van der Maaten, and K. Q. Weinberger. Memory-efficient implementation of densenets. arXiv preprint arXiv:1707.06990, 2017."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"e_1_3_2_1_20_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan K.","year":"2014","unstructured":"K. Simonyan and A. Zisserman . Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 , 2014 . K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3298023.3298188"},{"key":"e_1_3_2_1_23_1","first-page":"4","volume-title":"Proc. Deep Learning and Unsupervised Feature Learning NIPS Workshop","volume":"1","author":"Vanhoucke V.","year":"2011","unstructured":"V. Vanhoucke , A. Senior , and M. Z. Mao . Improving the speed of neural networks on cpus . In Proc. Deep Learning and Unsupervised Feature Learning NIPS Workshop , volume 1 , page 4 , 2011 . V. Vanhoucke, A. Senior, and M. Z. Mao. Improving the speed of neural networks on cpus. In Proc. Deep Learning and Unsupervised Feature Learning NIPS Workshop, volume 1, page 4, 2011."},{"key":"e_1_3_2_1_24_1","volume-title":"Efficient communications in training large scale neural networks. arXiv preprint arXiv:1611.04255","author":"Wang L.","year":"2016","unstructured":"L. Wang , W. Wu , G. Bosilca , R. Vuduc , and Z. Xu . Efficient communications in training large scale neural networks. arXiv preprint arXiv:1611.04255 , 2016 . L. Wang, W. Wu, G. Bosilca, R. Vuduc, and Z. Xu. Efficient communications in training large scale neural networks. arXiv preprint arXiv:1611.04255, 2016."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926256"},{"key":"e_1_3_2_1_26_1","volume-title":"Neural Networks","author":"Wang L.","year":"2017","unstructured":"L. Wang , Y. Yang , R. Min , and S. Chakradhar . Accelerating deep neural network training with inconsistent stochastic gradient descent . Neural Networks , 2017 . L. Wang, Y. Yang, R. Min, and S. Chakradhar. Accelerating deep neural network training with inconsistent stochastic gradient descent. Neural Networks, 2017."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.56"},{"key":"e_1_3_2_1_28_1","volume-title":"Spark: Cluster computing with working sets. Hot-Cloud, 10(10--10):95","author":"Zaharia M.","year":"2010","unstructured":"M. Zaharia , M. Chowdhury , M. J. Franklin , S. Shenker , and I. Stoica . Spark: Cluster computing with working sets. Hot-Cloud, 10(10--10):95 , 2010 . M. Zaharia, M. Chowdhury, M. J. Franklin, S. Shenker, and I. Stoica. Spark: Cluster computing with working sets. Hot-Cloud, 10(10--10):95, 2010."}],"event":{"name":"PPoPP '18: 23nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Vienna Austria","acronym":"PPoPP '18"},"container-title":["Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178487.3178491","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3178487.3178491","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3178487.3178491","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:39:07Z","timestamp":1750196347000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178487.3178491"}},"subtitle":["dynamic GPU memory management for training deep neural networks"],"short-title":[],"issued":{"date-parts":[[2018,2,10]]},"references-count":28,"alternative-id":["10.1145\/3178487.3178491","10.1145\/3178487"],"URL":"https:\/\/doi.org\/10.1145\/3178487.3178491","relation":{},"subject":[],"published":{"date-parts":[[2018,2,10]]},"assertion":[{"value":"2018-02-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}