{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T05:20:58Z","timestamp":1751779258161,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,8,5]],"date-time":"2019-08-05T00:00:00Z","timestamp":1564963200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,8,5]]},"DOI":"10.1145\/3337821.3337905","type":"proceedings-article","created":{"date-parts":[[2019,7,25]],"date-time":"2019-07-25T12:34:36Z","timestamp":1564058076000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Performance, Energy, and Scalability Analysis and Improvement of Parallel Cancer Deep Learning CANDLE Benchmarks"],"prefix":"10.1145","author":[{"given":"Xingfu","family":"Wu","sequence":"first","affiliation":[{"name":"Argonne National Laboratory, University of Chicago"}]},{"given":"Valerie","family":"Taylor","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, University of Chicago"}]},{"given":"Justin M.","family":"Wozniak","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, University of Chicago"}]},{"given":"Rick","family":"Stevens","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, University of Chicago"}]},{"given":"Thomas","family":"Brettin","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, University of Chicago"}]},{"given":"Fangfang","family":"Xia","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, University of Chicago"}]}],"member":"320","published-online":{"date-parts":[[2019,8,5]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems, arXiv:1603.04467","author":"Abadi M.","year":"2016","unstructured":"M. Abadi , A. Agarwal , P. Barham , E. Brevdo , Z. Chen , C. Citro , G. S. Corrado , A. Davis , J. Dean , M. Devin , S. Ghemawat , I. Goodfellow , A. Harp , G. Irving , M. Isard , Y. Jia , R. Jozefowicz , L. Kaiser , M. Kudlur , J. Levenberg , D. Mane , R. Monga , S. Moore , D. Murray , C. Olah , M. Schuster , J. Shlens , B. Steiner , I. Sutskever , K. Talwar , P. Tucker , V. Vanhoucke , V. Vasudevan , F. Viegas , O. Vinyals , P. Warden , M. Wattenberg , M. Wicke , Y. Yu , and X. Zheng , TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems, arXiv:1603.04467 , 2016 . M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen, C. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, S. Ghemawat, I. Goodfellow, A. Harp, G. Irving, M. Isard, Y. Jia, R. Jozefowicz, L. Kaiser, M. Kudlur, J. Levenberg, D. Mane, R. Monga, S. Moore, D. Murray, C. Olah, M. Schuster, J. Shlens, B. Steiner, I. Sutskever, K. Talwar, P. Tucker, V. Vanhoucke, V. Vasudevan, F. Viegas, O. Vinyals, P. Warden, M. Wattenberg, M. Wicke, Y. Yu, and X. Zheng, TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems, arXiv:1603.04467, 2016."},{"key":"e_1_3_2_1_2_1","volume-title":"TensorFlow: A System for Large-Scale Machine Learning, arXiv:1605.08695","author":"Abadi M.","year":"2016","unstructured":"M. Abadi , P. Barham , J. Chen , Z. Chen , A. Davis , J. Dean , M. Devin , S. Ghemawat , G. Irving , M. Isard , M. Kudlur , J. Levenberg , R. Monga , S. Moore , D. G. Murray , B. Steiner , P. Tucker , V. Vasudevan , P. Warden , M. Wicke , Y. Yu , and X. Zheng , TensorFlow: A System for Large-Scale Machine Learning, arXiv:1605.08695 , 2016 . M. Abadi, P. Barham, J. Chen, Z. Chen, A. Davis, J. Dean, M. Devin, S. Ghemawat, G. Irving, M. Isard, M. Kudlur, J. Levenberg, R. Monga, S. Moore, D. G. Murray, B. Steiner, P. Tucker, V. Vasudevan, P. Warden, M. Wicke, Y. Yu, and X. Zheng, TensorFlow: A System for Large-Scale Machine Learning, arXiv:1605.08695, 2016."},{"key":"e_1_3_2_1_3_1","volume-title":"Summit Training Workshop","author":"Atchley S.","year":"2018","unstructured":"S. Atchley , Summit Architecture Overview , Summit Training Workshop , Dec. 2018 . S. Atchley, Summit Architecture Overview, Summit Training Workshop, Dec. 2018."},{"key":"e_1_3_2_1_4_1","unstructured":"Baidu-allreduce https:\/\/github.com\/baidu-research\/baidu-allreduce.  Baidu-allreduce https:\/\/github.com\/baidu-research\/baidu-allreduce."},{"key":"e_1_3_2_1_5_1","unstructured":"CANDLE\n  : Cancer Distributed Learning Environment http:\/\/candle.cels.anl.gov.  CANDLE: Cancer Distributed Learning Environment http:\/\/candle.cels.anl.gov."},{"key":"e_1_3_2_1_6_1","volume-title":"Sep.","author":"Ben-Nun T.","year":"2018","unstructured":"T. Ben-Nun and T. Hoefler , Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis, https:\/\/www.arxiv.org\/abs\/1802.09941 , Sep. 2018 . T. Ben-Nun and T. Hoefler, Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis, https:\/\/www.arxiv.org\/abs\/1802.09941, Sep. 2018."},{"key":"e_1_3_2_1_7_1","volume-title":"https:\/\/github.com\/ECP-CANDLE\/Benchmarks (Accessed","author":"Benchmarks CANDLE","year":"2018","unstructured":"CANDLE Benchmarks : https:\/\/github.com\/ECP-CANDLE\/Benchmarks (Accessed in Dec. 2018 ). CANDLE Benchmarks: https:\/\/github.com\/ECP-CANDLE\/Benchmarks (Accessed in Dec. 2018)."},{"key":"e_1_3_2_1_8_1","unstructured":"CANDLENT3 https:\/\/github.com\/ECP-CANDLE\/Benchmarks\/tree\/master\/Pilot1\/NT3.  CANDLENT3 https:\/\/github.com\/ECP-CANDLE\/Benchmarks\/tree\/master\/Pilot1\/NT3."},{"key":"e_1_3_2_1_9_1","unstructured":"Chrome trace event profiling tool https:\/\/www.chromium.org\/developers\/howtos\/trace-event-profiling-tool.  Chrome trace event profiling tool https:\/\/www.chromium.org\/developers\/howtos\/trace-event-profiling-tool."},{"key":"e_1_3_2_1_11_1","unstructured":"Cray XC40 Theta Argonne National Laboratory https:\/\/www.alcf.anl.gov\/theta.  Cray XC40 Theta Argonne National Laboratory https:\/\/www.alcf.anl.gov\/theta."},{"key":"e_1_3_2_1_12_1","unstructured":"Dask DataFrame https:\/\/docs.dask.org\/en\/latest\/dataframe.html.  Dask DataFrame https:\/\/docs.dask.org\/en\/latest\/dataframe.html."},{"key":"e_1_3_2_1_13_1","unstructured":"Distributed TensorFlow https:\/\/www.tensorflow.org\/deploy\/distributed https:\/\/github.com\/tensorflow\/tensorflow\/blob\/master\/tensorflow\/core\/distributed_runtime\/README.md.  Distributed TensorFlow https:\/\/www.tensorflow.org\/deploy\/distributed https:\/\/github.com\/tensorflow\/tensorflow\/blob\/master\/tensorflow\/core\/distributed_runtime\/README.md."},{"key":"e_1_3_2_1_14_1","unstructured":"Horovod: A Distributed Training Framework for TensorFlow https:\/\/github.com\/uber\/horovod.  Horovod: A Distributed Training Framework for TensorFlow https:\/\/github.com\/uber\/horovod."},{"key":"e_1_3_2_1_15_1","unstructured":"jsrun visualizer https:\/\/jsrunvisualizer.olcf.ornl.gov  jsrun visualizer https:\/\/jsrunvisualizer.olcf.ornl.gov"},{"key":"e_1_3_2_1_16_1","unstructured":"Keras: The Python Deep Learning Library https:\/\/keras.io\/#keras-the-python-deep-learning-library.  Keras: The Python Deep Learning Library https:\/\/keras.io\/#keras-the-python-deep-learning-library."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3149412.3149419"},{"key":"e_1_3_2_1_18_1","volume-title":"Summit Training Workshop","author":"Markomanolis G. S.","year":"2018","unstructured":"G. S. Markomanolis , Spectrum Scale (GPFS) , Summit Training Workshop , 2018 . G. S. Markomanolis, Spectrum Scale (GPFS), Summit Training Workshop, 2018."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the Cray User Group (CUG)","author":"Martin S.","year":"2016","unstructured":"S. Martin , D. Rush , M. Kappel , M. Sandstedt , and J. Williams . 2016. Cray XC40 Power Monitoring and Control for Knights Landing . Proceedings of the Cray User Group (CUG) , 2016 . S. Martin, D. Rush, M. Kappel, M. Sandstedt, and J. Williams. 2016. Cray XC40 Power Monitoring and Control for Knights Landing. Proceedings of the Cray User Group (CUG), 2016."},{"key":"e_1_3_2_1_20_1","volume-title":"An Empirical Model of Large-Batch Training, arXiv:1812.06162v1","author":"McCandlish S.","year":"2018","unstructured":"S. McCandlish , J. Kaplan , and D. Amodei , An Empirical Model of Large-Batch Training, arXiv:1812.06162v1 , 14 Dec 2018 . S. McCandlish, J. Kaplan, and D. Amodei, An Empirical Model of Large-Batch Training, arXiv:1812.06162v1, 14 Dec 2018."},{"key":"e_1_3_2_1_21_1","volume-title":"ALCF SDL Workshop","author":"Mendygral P.","year":"2018","unstructured":"P. Mendygral , Scaling Deep Learning , ALCF SDL Workshop , March 2018 . P. Mendygral, Scaling Deep Learning, ALCF SDL Workshop, March 2018."},{"key":"e_1_3_2_1_22_1","unstructured":"Microsoft Cognitive Toolkit https:\/\/github.com\/Microsoft\/cntk.  Microsoft Cognitive Toolkit https:\/\/github.com\/Microsoft\/cntk."},{"key":"e_1_3_2_1_23_1","unstructured":"NVIDIA Collective Communications Library (NCCL 2.3.7) https:\/\/developer.nvidia.com\/nccl.  NVIDIA Collective Communications Library (NCCL 2.3.7) https:\/\/developer.nvidia.com\/nccl."},{"key":"e_1_3_2_1_24_1","volume-title":"https:\/\/developer.nvidia.com\/nvidia-system-management-interface","author":"System Management NVIDIA","year":"2018","unstructured":"NVIDIA System Management Interface (nvidia-smi) , https:\/\/developer.nvidia.com\/nvidia-system-management-interface , 2018 . NVIDIA System Management Interface (nvidia-smi), https:\/\/developer.nvidia.com\/nvidia-system-management-interface, 2018."},{"key":"e_1_3_2_1_25_1","unstructured":"NVProf https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html.  NVProf https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html."},{"key":"e_1_3_2_1_26_1","unstructured":"Pandas https:\/\/pandas.pydata.org\/pandas-docs\/stable\/.  Pandas https:\/\/pandas.pydata.org\/pandas-docs\/stable\/."},{"key":"e_1_3_2_1_27_1","unstructured":"Python Profilers https:\/\/docs.python.org\/2\/library\/profile.html.  Python Profilers https:\/\/docs.python.org\/2\/library\/profile.html."},{"key":"e_1_3_2_1_28_1","volume-title":"Del Balso","author":"Sergeev A.","year":"1802","unstructured":"A. Sergeev and M. Del Balso , Horovod : Fast and Easy Distributed Deep Learning in TensorFlow , arXiv: 1802 .05799v3, Feb. 21, 2018. A. Sergeev and M. Del Balso, Horovod: Fast and Easy Distributed Deep Learning in TensorFlow, arXiv:1802.05799v3, Feb. 21, 2018."},{"key":"e_1_3_2_1_29_1","unstructured":"Summit https:\/\/www.olcf.ornl.gov\/olcf-resources\/compute-systems\/summit\/  Summit https:\/\/www.olcf.ornl.gov\/olcf-resources\/compute-systems\/summit\/"},{"key":"e_1_3_2_1_30_1","unstructured":"TensorFlow https:\/\/www.tensorflow.org.  TensorFlow https:\/\/www.tensorflow.org."},{"key":"e_1_3_2_1_31_1","unstructured":"TensorFlow Benchmarks https:\/\/www.tensorflow.org\/performance\/benchmarks.  TensorFlow Benchmarks https:\/\/www.tensorflow.org\/performance\/benchmarks."},{"key":"e_1_3_2_1_32_1","unstructured":"Theano https:\/\/github.com\/Theano\/Theano.  Theano https:\/\/github.com\/Theano\/Theano."},{"volume-title":"BMC Bioinformatics","author":"Wozniak J. M.","key":"e_1_3_2_1_33_1","unstructured":"J. M. Wozniak , R. Jain , P. Balaprakash , J. Ozik , N. Collier , J. Bauer , F. Xia , T. Brettin , R. Stevens , J. Mohd-Yusof , C. G. Cardona , B. Van Essen , and M. Baughman , CANDLE\/Supervisor: A Workflow Framework for Machine Learning Applied to Cancer Research , BMC Bioinformatics , Vol. 19 , No. 18, 2018. J. M. Wozniak, R. Jain, P. Balaprakash, J. Ozik, N. Collier, J. Bauer, F. Xia, T. Brettin, R. Stevens, J. Mohd-Yusof, C. G. Cardona, B. Van Essen, and M. Baughman, CANDLE\/Supervisor: A Workflow Framework for Machine Learning Applied to Cancer Research, BMC Bioinformatics, Vol. 19, No. 18, 2018."},{"issue":"10","key":"e_1_3_2_1_34_1","first-page":"20","volume":"49","author":"Wu X.","year":"2016","unstructured":"X. Wu , V. Taylor , J. Cook , and P. Mucci , Using Performance-Power Modeling to Improve Energy Efficiency of HPC Applications, IEEE Computer , Vol. 49 , No. 10 , pp. 20 -- 29 , Oct. 2016 . X. Wu, V. Taylor, J. Cook, and P. Mucci, Using Performance-Power Modeling to Improve Energy Efficiency of HPC Applications, IEEE Computer, Vol. 49, No. 10, pp. 20--29, Oct. 2016.","journal-title":"Using Performance-Power Modeling to Improve Energy Efficiency of HPC Applications, IEEE Computer"},{"key":"e_1_3_2_1_35_1","volume-title":"SC18 Workshop on Python for High-Performance and Scientific Computing","author":"Wu X.","year":"2018","unstructured":"X. Wu , V. Taylor , J. M. Wozniak , R. Stevens , T. Brettin and F. Xia , Performance, Power, and Scalability Analysis of the Horovod Implementation of the CANDLE NT3 Benchmark on the Cray XC40 Theta , SC18 Workshop on Python for High-Performance and Scientific Computing , Dallas, USA , Nov. 12, 2018 . X. Wu, V. Taylor, J. M. Wozniak, R. Stevens, T. Brettin and F. Xia, Performance, Power, and Scalability Analysis of the Horovod Implementation of the CANDLE NT3 Benchmark on the Cray XC40 Theta, SC18 Workshop on Python for High-Performance and Scientific Computing, Dallas, USA, Nov. 12, 2018."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3225058.3225069"}],"event":{"name":"ICPP 2019: 48th International Conference on Parallel Processing","sponsor":["University of Tsukuba University of Tsukuba"],"location":"Kyoto Japan","acronym":"ICPP 2019"},"container-title":["Proceedings of the 48th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3337821.3337905","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3337821.3337905","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:25:42Z","timestamp":1750206342000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3337821.3337905"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8,5]]},"references-count":35,"alternative-id":["10.1145\/3337821.3337905","10.1145\/3337821"],"URL":"https:\/\/doi.org\/10.1145\/3337821.3337905","relation":{},"subject":[],"published":{"date-parts":[[2019,8,5]]},"assertion":[{"value":"2019-08-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}