{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T19:53:53Z","timestamp":1771703633103,"version":"3.50.1"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319691787","type":"print"},{"value":"9783319691794","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-69179-4_2","type":"book-chapter","created":{"date-parts":[[2017,10,13]],"date-time":"2017-10-13T00:48:52Z","timestamp":1507855732000},"page":"18-32","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Distributed Training Large-Scale Deep Architectures"],"prefix":"10.1007","author":[{"given":"Shang-Xuan","family":"Zou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun-Yen","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jui-Lin","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun-Nan","family":"Chou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chia-Chin","family":"Tsao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kuan-Chieh","family":"Tung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ting-Wei","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng-Lung","family":"Sung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Edward Y.","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,14]]},"reference":[{"key":"2_CR1","unstructured":"Abadi, M. et al.: TensorFlow: large-scale machine learning on heterogeneous systems, 2015. Software available from tensorflow.org (2015)."},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Amdahl, G.M.: Validity of the single processor approach to achieving large scale computing capabilities. In: Proceedings of the Spring Joint Computer Conference, 18\u201320 April 1967, pp. 483\u2013485. ACM (1967)","DOI":"10.1145\/1465482.1465560"},{"key":"2_CR3","unstructured":"Bahrampour, S. et al.: Comparative study of deep learning software frameworks. In: arXiv.org. arxiv: 1511.06435v3 [cs.LG], November 2015"},{"key":"2_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-642-35289-8_26","volume-title":"Neural Networks: Tricks of the Trade","author":"Y Bengio","year":"2012","unstructured":"Bengio, Y.: Practical recommendations for gradient-based training of deep architectures. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 437\u2013478. Springer, Heidelberg (2012). doi:10.1007\/978-3-642-35289-8_26"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Chang, E., Garcia-Molina, H., Li, C.: 2D BubbleUp: managing parallel disks for media servers. Technical report, Stanford InfoLab (1998)","DOI":"10.1145\/266180.266339"},{"key":"2_CR6","unstructured":"Chen, J. et al.: Revisiting distributed synchronous SGD. arXiv preprint arXiv:1604.00981 (2016)"},{"key":"2_CR7","unstructured":"Chen, T. et al.: MXNet: a flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274 (2015)"},{"key":"2_CR8","unstructured":"Chilimbi, T.M. et al.: Project adam: building an efficient and scalable deep learning training system. In: OSDI, vol. 14, pp. 571\u2013582 (2014)"},{"key":"2_CR9","unstructured":"Collobert, R., Kavukcuoglu, K., Farabet, C.: Torch7: a matlab-like environment for machine learning. In: EPFL-CONF-192376 (2011)"},{"key":"2_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1007\/978-3-319-11179-7_36","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2014","author":"J Cong","year":"2014","unstructured":"Cong, J., Xiao, B.: Minimizing computation in convolutional neural networks. In: Wermter, S., Weber, C., Duch, W., Honkela, T., Koprinkova-Hristova, P., Magg, S., Palm, G., Villa, A.E.P. (eds.) ICANN 2014. LNCS, vol. 8681, pp. 281\u2013290. Springer, Cham (2014). doi:10.1007\/978-3-319-11179-7_36"},{"key":"2_CR11","unstructured":"CS231n Convolutional neural network for visual recognition (2017). http:\/\/cs231n.github.io\/"},{"key":"2_CR12","unstructured":"Dally, W.J.: CNTK: an embedded language for circuit description. Department of Computer Science, California Institute of Technology, Display File"},{"key":"2_CR13","unstructured":"Dean, J. et al.: Large scale distributed deep networks, pp. 1223\u20131231 (2012)"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Deng, J. et al.: ImageNet: a large-scale hierarchical image database. In: CVPR 2009 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"12","key":"2_CR15","doi-asserted-by":"publisher","first-page":"960","DOI":"10.14778\/2994509.2994515","volume":"9","author":"A Elgohary","year":"2016","unstructured":"Elgohary, A., et al.: Compressed linear algebra for large-scale machine learning. Proc. VLDB Endow. 9(12), 960\u2013971 (2016)","journal-title":"Proc. VLDB Endow."},{"issue":"4","key":"2_CR16","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1137\/120880811","volume":"23","author":"S Ghadimi","year":"2013","unstructured":"Ghadimi, S., Lan, G.: Stochastic first-and zeroth-order methods for nonconvex stochastic programming. SIAM J. Optim. 23(4), 2341\u20132368 (2013)","journal-title":"SIAM J. Optim."},{"key":"2_CR17","unstructured":"GNU linear programming kit (2012). https:\/\/www.gnu.org\/software\/glpk\/"},{"key":"2_CR18","unstructured":"Goyal, P. et al.: Accurate, large Minibatch SGD: training ImageNet in 1\u00a0h. arXiv preprint arXiv:1706.02677 (2017)"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Hadjis, S., et al.: Caffe con troll: shallow ideas to speed up deep learning, April 2015. arXiv.org. arXiv: 1504.04343v2 [cs.LG]","DOI":"10.1145\/2799562.2799641"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"He, K. et al.: Deep residual learning for image recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"1","key":"2_CR21","first-page":"926","volume":"9","author":"G Hinton","year":"2010","unstructured":"Hinton, G.: A practical guide to training restricted Boltzmann machines. Momentum 9(1), 926 (2010)","journal-title":"Momentum"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Iandola, F.N. et al.: FireCaffe - near-linear acceleration of deep neural network training on compute clusters. In: CVPR, pp. 2592\u20132600 (2016)","DOI":"10.1109\/CVPR.2016.284"},{"key":"2_CR23","unstructured":"Ioffe, S.: Batch renormalization: towards reducing Minibatch dependence in batch-normalized models, February 2017. arXiv.org. arXiv: 1702.03275v1 [cs.LG]"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Bergstra, J. et al.: Theano: a CPU and GPU math expression compiler (2010)","DOI":"10.25080\/Majora-92bf1922-003"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Jia, Y. et al.: Caffe: convolutional architecture for fast feature embedding. In: Proceedings of the 22nd ACM International Conference on Multimedia, pp. 675\u2013678 (2014)","DOI":"10.1145\/2647868.2654889"},{"key":"2_CR26","unstructured":"Krizhevsky, A.: One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997 (2014)"},{"key":"2_CR27","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Pereira, F. et al. (eds.) Advances in Neural Information Processing Systems, vol. 25, pp. 1097\u20131105. Curran Associates Inc. (2012)"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Lavin, A., Gray, S.: Fast algorithms for convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4013\u20134021 (2016)","DOI":"10.1109\/CVPR.2016.435"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Li, M. et al.: Scaling distributed machine learning with the parameter server. In: OSDI (2014)","DOI":"10.1145\/2640087.2644155"},{"key":"2_CR30","doi-asserted-by":"publisher","unstructured":"Liu, Z. et al.: PLDA+: parallel latent Dirichlet allocation with data placement and pipeline processing. ACM Trans. Intell. Syst. Technol. 2(3), 26:1\u201326:18 (2011). ISSN, pp. 2157\u20136904, doi:10.1145\/1961189.1961198. http:\/\/doi.acm.org\/10.1145\/1961189.1961198","DOI":"10.1145\/1961189.1961198"},{"key":"2_CR31","unstructured":"Mathieu, M., Henaff, M., LeCun, Y.: Fast training of convolutional networks through FFTs. In: CoRR abs\/1312.5851 cs.CV (2013)"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Nemhauser, G.L., Wolsey, L.A.: Integer programming and combinatorial optimization. In: Nemhauser, G.L., Savelsbergh, M.W.P., Sigismondi, G.S. (eds.) Constraint Classification for Mixed Integer Programming Formulations. Wiley, Chichester (1992). COAL Bull. 20, 8\u201312 (1988)","DOI":"10.1002\/9781118627372.ch2"},{"key":"2_CR33","unstructured":"Ng, A.Y.: The nuts and bolts of machine learning. In: NIPS Workshop on Deep Learning and Unsupervised Feature Learning (2016)"},{"key":"2_CR34","unstructured":"Niu, F. et al.: A lock-free approach to parallelizing stochastic gradient descent. arXiv preprint arXiv:1106.5730 (2011)"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Shi, S. et al.: Benchmarking state-of-the-art deep learning software tools, August 2016. arXiv.org. arXiv:1608.07249v5 [cs.DC]","DOI":"10.1109\/CCBD.2016.029"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V.: Inception-v4, Inception-ResNet and the impact of residual connections on learning. In: CoRR abs\/1602.07261 (2016). http:\/\/arxiv.org\/abs\/1602.07261","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"2_CR37","unstructured":"Vasilache, N. et al.: Fast convolutional nets with fbfft: a GPU performance evaluation. arXiv preprint arXiv:1412.7580 (2014)"},{"key":"2_CR38","unstructured":"Zhang, H. et al.: Poseidon: a system architecture for effcient GPU-based deep learning on multiple machines. arXiv preprint arXiv:1512.06216 (2015)"},{"key":"2_CR39","unstructured":"Zheng, Z. et al.: SpeeDO: parallelizing stochastic gradient descent for deep convolutional neural network. In: NIPS Workshop on Learning Systems (2015)"},{"key":"2_CR40","unstructured":"Zinkevich, M. et al.: Parallelized stochastic gradient descent, pp. 2595\u20132603 (2010)"},{"key":"2_CR41","unstructured":"Zou, S.-X. et al.: Distributed training large-scale deep architectures. HTC technical report (2017). https:\/\/research.htc.com\/publications-and-talks"}],"container-title":["Lecture Notes in Computer Science","Advanced Data Mining and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-69179-4_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T13:28:07Z","timestamp":1709818087000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-69179-4_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319691787","9783319691794"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-69179-4_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"14 October 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Data Mining and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 November 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 November 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adma2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}