{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T09:41:40Z","timestamp":1775122900422,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,1,26]],"date-time":"2017-01-26T00:00:00Z","timestamp":1485388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,1,26]]},"DOI":"10.1145\/3018743.3018769","type":"proceedings-article","created":{"date-parts":[[2017,1,27]],"date-time":"2017-01-27T13:41:04Z","timestamp":1485524464000},"page":"193-205","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":80,"title":["S-Caffe"],"prefix":"10.1145","author":[{"given":"Ammar Ahmad","family":"Awan","sequence":"first","affiliation":[{"name":"The Ohio State University, Columbus, OH, USA"}]},{"given":"Khaled","family":"Hamidouche","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, OH, USA"}]},{"given":"Jahanzeb Maqbool","family":"Hashmi","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, OH, USA"}]},{"given":"Dhabaleswar K.","family":"Panda","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, OH, USA"}]}],"member":"320","published-online":{"date-parts":[[2017,1,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Caffe: Multi-GPU Usage and Performance. https:\/\/github.com\/yahoo\/caffe\/blob\/master\/docs\/multigpu.md.  Caffe: Multi-GPU Usage and Performance. https:\/\/github.com\/yahoo\/caffe\/blob\/master\/docs\/multigpu.md."},{"key":"e_1_3_2_1_2_1","unstructured":"KESCH\n  : Cray CS-Storm System. http:\/\/www.cscs.ch\/computers\/kesch_escha\/index.html.  KESCH: Cray CS-Storm System. http:\/\/www.cscs.ch\/computers\/kesch_escha\/index.html."},{"key":"e_1_3_2_1_3_1","unstructured":"Intel Caffe. https:\/\/github.com\/intelcaffe.  Intel Caffe. https:\/\/github.com\/intelcaffe."},{"key":"e_1_3_2_1_4_1","unstructured":"A Unified Runtime System for Heterogeneous Multicore Architectures. http:\/\/starpu.gforge.inria.fr.  A Unified Runtime System for Heterogeneous Multicore Architectures. http:\/\/starpu.gforge.inria.fr."},{"key":"e_1_3_2_1_5_1","volume-title":"http:\/\/image-net.org\/challenges\/LSVRC\/2012\/index","year":"2012"},{"key":"e_1_3_2_1_6_1","volume-title":"http:\/\/caffe.berkeleyvision.org\/","author":"Website Caffe","year":"2015"},{"key":"e_1_3_2_1_7_1","volume-title":"http:\/\/papers.nips.cc\/book\/advances-in-neural-information-processing-systems-25--2012","year":"2015"},{"key":"e_1_3_2_1_8_1","volume-title":"http:\/\/docs.nvidia.com\/cuda\/gpudirect-rdma\/","author":"GPU","year":"2015"},{"key":"e_1_3_2_1_9_1","unstructured":"HPC\n  : Powering Deep Learning. http:\/\/computing.ornl.gov\/workshops\/SMC15\/docs\/bcatanzaro_smcc.pdf 2015. [Online; accessed Dec-2016].  HPC: Powering Deep Learning. http:\/\/computing.ornl.gov\/workshops\/SMC15\/docs\/bcatanzaro_smcc.pdf 2015. [Online; accessed Dec-2016]."},{"key":"e_1_3_2_1_10_1","volume-title":"http:\/\/symas.com\/mdb\/","author":"LMDB.","year":"2015"},{"key":"e_1_3_2_1_11_1","volume-title":"http:\/\/www.nvidia.com\/object\/drive-px.html","author":"Autonomous Cars Nvidia Development","year":"2016"},{"key":"e_1_3_2_1_12_1","volume-title":"http:\/\/www.cntk.ai\/","author":"CNTK.","year":"2016"},{"key":"e_1_3_2_1_13_1","volume-title":"http:\/\/www.extremetech.com\/computing\/194391-nvidias-new-tesla-k80-doubles-up-on-gpu-horsepower","author":"Us Comparison Nvidia","year":"2016"},{"key":"e_1_3_2_1_14_1","volume-title":"TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems","author":"Abadi M.","year":"2015"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2008.01.047"},{"key":"e_1_3_2_1_16_1","volume-title":"Neon, Theano, and Torch for Deep Learning. CoRR, abs\/1511.06435","author":"Bahrampour S.","year":"2016"},{"key":"e_1_3_2_1_17_1","volume-title":"Theano: New Features and Speed Improvements. arXiv preprint arXiv:1211.5590","author":"Bastien F.","year":"2012"},{"key":"e_1_3_2_1_18_1","volume-title":"AMBER 2015","author":"Case D.","year":"2015"},{"key":"e_1_3_2_1_19_1","first-page":"571","volume-title":"Proceedings of the 11th USENIX Conference on Operating Systems Design and Implementation, OSDI'14","author":"Chilimbi T.","year":"2014"},{"key":"e_1_3_2_1_20_1","first-page":"1337","volume-title":"Proceedings of the 30th international conference on machine learning","author":"Coates A.","year":"2013"},{"key":"e_1_3_2_1_21_1","volume-title":"IDIAP","author":"Collobert R.","year":"2002"},{"key":"e_1_3_2_1_22_1","volume-title":"http:\/\/docs.cray.com\/books\/004--3689-001\/html-004--3689-001\/004--3689-001-toc.html","year":"2016"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901323"},{"key":"e_1_3_2_1_24_1","first-page":"1223","volume-title":"Advances in Neural Information Processing Systems","author":"Dean J.","year":"2012"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_26_1","unstructured":"Google. Google's Remote Procedure Call Library (gRPC). http:\/\/www.grpc.io natexlaba.  Google. Google's Remote Procedure Call Library (gRPC). http:\/\/www.grpc.io natexlaba."},{"key":"e_1_3_2_1_27_1","unstructured":"Google. Distributed TensorFlow: Github Issues. https:\/\/github.com\/tensorflow\/models\/issues\/698 natexlabb.  Google. Distributed TensorFlow: Github Issues. https:\/\/github.com\/tensorflow\/models\/issues\/698 natexlabb."},{"key":"e_1_3_2_1_28_1","first-page":"1","volume-title":"Workshops and Phd Forum (IPDPSW), 2010 IEEE International Symposium on","author":"Graham R. L.","year":"2010"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1362622.1362692"},{"key":"e_1_3_2_1_30_1","volume-title":"FireCaffe: Near-Linear Acceleration of Deep Neural Network Training on Compute Clusters. arXiv preprint arXiv:1511.00175","author":"Iandola F. N.","year":"2015"},{"key":"e_1_3_2_1_31_1","volume-title":"https:\/\/github.com\/Caffe-MPI\/Caffe-MPI.github.io","year":"2016"},{"key":"e_1_3_2_1_32_1","unstructured":"J. Dean. Keynote: Large Scale Deep Learning.  J. Dean. Keynote: Large Scale Deep Learning."},{"key":"e_1_3_2_1_33_1","volume-title":"Caffe: Convolutional Architecture for Fast Feature Embedding. arXiv preprint arXiv:1408.5093","author":"Jia Y.","year":"2014"},{"key":"e_1_3_2_1_34_1","volume-title":"abs\/1404.5997","author":"Krizhevsky A.","year":"2014"},{"key":"e_1_3_2_1_35_1","volume-title":"Learning Multiple Layers of Features from Tiny Images","author":"Krizhevsky A.","year":"2009"},{"key":"e_1_3_2_1_36_1","first-page":"1097","volume-title":"Advances in Neural Information Processing Systems 25","author":"Krizhevsky A.","year":"2012"},{"key":"e_1_3_2_1_37_1","volume-title":"Why M Heads are Better than One: Training a Diverse Ensemble of Deep Networks. arXiv","author":"Lee S.","year":"2015"},{"key":"e_1_3_2_1_38_1","volume-title":"Network in Network. arXiv preprint arXiv:1312.4400","author":"Lin M.","year":"2013"},{"key":"e_1_3_2_1_39_1","unstructured":"Lustre. Parallel File System. http:\/\/lustre.org.  Lustre. Parallel File System. http:\/\/lustre.org."},{"key":"e_1_3_2_1_40_1","unstructured":"H. Meuer E. Strohmaier J. Dongarra and H. Simon. TOP 500 Supercomputer Sites. http:\/\/www.top500.org.  H. Meuer E. Strohmaier J. Dongarra and H. Simon. TOP 500 Supercomputer Sites. http:\/\/www.top500.org."},{"key":"e_1_3_2_1_41_1","unstructured":"MPI over InfiniBand 10GigE\/iWARP and RoCE. https:\/\/mvapich.cse.ohio-state.edu\/.  MPI over InfiniBand 10GigE\/iWARP and RoCE. https:\/\/mvapich.cse.ohio-state.edu\/."},{"key":"e_1_3_2_1_42_1","volume-title":"http:\/\/mvapich.cse.ohio-state.edu\/benchmarks\/","author":"Computing Laboratory Network Based","year":"2016"},{"key":"e_1_3_2_1_43_1","unstructured":"C. Nvidia. Programming Guide 2008.  C. Nvidia. Programming Guide 2008."},{"key":"e_1_3_2_1_44_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan K.","year":"2014"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_46_1","unstructured":"The HiDL Team. High Performance Deep Learning (HiDL) Project. http:\/\/hidl.cse.ohio-state.edu.  The HiDL Team. High Performance Deep Learning (HiDL) Project. http:\/\/hidl.cse.ohio-state.edu."},{"key":"e_1_3_2_1_47_1","unstructured":"The Open MPI Development Team. Open MPI : Open Source High Performance Computing. http:\/\/www.open-mpi.org.  The Open MPI Development Team. Open MPI : Open Source High Performance Computing. http:\/\/www.open-mpi.org."},{"key":"e_1_3_2_1_48_1","volume-title":"Distributed TensorFlow with MPI. arXiv preprint arXiv:1603.02339","author":"Vishnu A.","year":"2016"},{"key":"e_1_3_2_1_49_1","volume-title":"June","author":"Wang D.","year":"2016"}],"event":{"name":"PPoPP '17: 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","location":"Austin Texas USA","acronym":"PPoPP '17","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages"]},"container-title":["Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3018743.3018769","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3018743.3018769","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:24:10Z","timestamp":1750206250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3018743.3018769"}},"subtitle":["Co-designing MPI Runtimes and Caffe for Scalable Deep Learning on Modern GPU Clusters"],"short-title":[],"issued":{"date-parts":[[2017,1,26]]},"references-count":49,"alternative-id":["10.1145\/3018743.3018769","10.1145\/3018743"],"URL":"https:\/\/doi.org\/10.1145\/3018743.3018769","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/3155284.3018769","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2017,1,26]]},"assertion":[{"value":"2017-01-26","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}