{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T02:05:23Z","timestamp":1776996323764,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,7,17]],"date-time":"2021-07-17T00:00:00Z","timestamp":1626480000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"XRAC","award":["NCR-130002"],"award-info":[{"award-number":["NCR-130002"]}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["#1818253, #1854828, #1931537, #2007991, #2018627"],"award-info":[{"award-number":["#1818253, #1854828, #1931537, #2007991, #2018627"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,7,17]]},"DOI":"10.1145\/3437359.3465582","type":"proceedings-article","created":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T04:08:46Z","timestamp":1626581326000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["INAM: Cross-stack Profiling and Analysis of Communication in MPI-based Applications"],"prefix":"10.1145","author":[{"given":"Pouya","family":"Kousha","sequence":"first","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Kamal Raj","family":"Sankarapandian Dayala Ganesh Ram","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Mansa","family":"Kedia","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Hari","family":"Subramoni","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Arpan","family":"Jain","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Aamir","family":"Shafi","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Dhabaleswar","family":"Panda","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"given":"Trey","family":"Dockendorf","sequence":"additional","affiliation":[{"name":"Ohio Supercomputer Center, USA"}]},{"given":"Heechang","family":"Na","sequence":"additional","affiliation":[{"name":"Ohio Supercomputer Center, USA"}]},{"given":"Karen","family":"Tomko","sequence":"additional","affiliation":[{"name":"Ohio Supercomputer Center, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,7,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Horovod: Distributed training framework for TensorFlow. https:\/\/github.com\/uber\/horovod.  [n.d.]. Horovod: Distributed training framework for TensorFlow. https:\/\/github.com\/uber\/horovod."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. Integrated Performance Monitoring (IPM). http:\/\/ipm-hpc.sourceforge.net\/.  [n.d.]. Integrated Performance Monitoring (IPM). http:\/\/ipm-hpc.sourceforge.net\/."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. mpiP: Lightweight Scalable MPI Profiling. http:\/\/www.llnl.gov\/CASC\/mpip\/.  [n.d.]. mpiP: Lightweight Scalable MPI Profiling. http:\/\/www.llnl.gov\/CASC\/mpip\/."},{"key":"e_1_3_2_1_4_1","unstructured":"[n.d.]. Performance Co-Pilot. https:\/\/pcp.io.  [n.d.]. Performance Co-Pilot. https:\/\/pcp.io."},{"key":"e_1_3_2_1_5_1","unstructured":"[n.d.]. Prometheus exporter. https:\/\/github.com\/prometheus\/node_exporter.  [n.d.]. Prometheus exporter. https:\/\/github.com\/prometheus\/node_exporter."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"P. Kousha S. D. Kamal Raj H. Subramoni D. Panda H. Na T. Dockendorf K. Tomko. 2020. Accelerated Real-time Network Monitoring and Profiling at Scale using OSU INAM. In Practice and Experience in Advanced Research Computing (PEARC 2020) (Portland Oregon USA).  P. Kousha S. D. Kamal Raj H. Subramoni D. Panda H. Na T. Dockendorf K. Tomko. 2020. Accelerated Real-time Network Monitoring and Profiling at Scale using OSU INAM. In Practice and Experience in Advanced Research Computing (PEARC 2020) (Portland Oregon USA).","DOI":"10.1145\/3311790.3396672"},{"key":"e_1_3_2_1_7_1","volume-title":"The Lightweight Distributed Metric Service: A Scalable Infrastructure for Continuous Monitoring of Large Scale Computing Systems and Applications(SC \u201914)","author":"Agelastos Anthony","year":"2014","unstructured":"Anthony Agelastos , Benjamin Allan , Jim Brandt , Paul Cassella , Jeremy Enos , Joshi Fullop , Ann Gentile , Steve Monk , Nichamon Naksinehaboon , Jeff Ogden , Mahesh Rajan , Michael Showerman , Joel Stevenson , Narate Taerat , and Tom Tucker . 2014. The Lightweight Distributed Metric Service: A Scalable Infrastructure for Continuous Monitoring of Large Scale Computing Systems and Applications(SC \u201914) . IEEE Press , Piscataway, NJ, USA , 154\u2013165. https:\/\/doi.org\/10.1109\/SC. 2014 .18 Anthony Agelastos, Benjamin Allan, Jim Brandt, Paul Cassella, Jeremy Enos, Joshi Fullop, Ann Gentile, Steve Monk, Nichamon Naksinehaboon, Jeff Ogden, Mahesh Rajan, Michael Showerman, Joel Stevenson, Narate Taerat, and Tom Tucker. 2014. The Lightweight Distributed Metric Service: A Scalable Infrastructure for Continuous Monitoring of Large Scale Computing Systems and Applications(SC \u201914). IEEE Press, Piscataway, NJ, USA, 154\u2013165. https:\/\/doi.org\/10.1109\/SC.2014.18"},{"key":"e_1_3_2_1_8_1","unstructured":"Apache Foundation. [n.d.]. MxNet: A Flexible and Effcient Library for Deep Learning. https:\/\/mxnet.apache.org\/.  Apache Foundation. [n.d.]. MxNet: A Flexible and Effcient Library for Deep Learning. https:\/\/mxnet.apache.org\/."},{"key":"e_1_3_2_1_9_1","unstructured":"ARM Holdings. [n.d.]. ARM MAP. https:\/\/www.arm.com\/products\/development-tools\/server-and-hpc\/forge\/map.  ARM Holdings. [n.d.]. ARM MAP. https:\/\/www.arm.com\/products\/development-tools\/server-and-hpc\/forge\/map."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2019.00064"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2949986"},{"key":"e_1_3_2_1_12_1","unstructured":"B. Barth T. Evans and J. McCalpin. [n.d.]. TACC STATS. https:\/\/www.tacc.utexas.edu\/research-development\/tacc-projects\/tacc-stats.  B. Barth T. Evans and J. McCalpin. [n.d.]. TACC STATS. https:\/\/www.tacc.utexas.edu\/research-development\/tacc-projects\/tacc-stats."},{"key":"e_1_3_2_1_13_1","unstructured":"Barcelona Supercomputing Center. [n.d.]. Paraver. http:\/\/www.bsc.es\/computer-sciences\/performance-tools\/paraver.  Barcelona Supercomputing Center. [n.d.]. Paraver. http:\/\/www.bsc.es\/computer-sciences\/performance-tools\/paraver."},{"key":"e_1_3_2_1_14_1","unstructured":"CaRCC [n.d.]. CaRCC - Campus Research Computing Consortium. https:\/\/carcc.org\/.  CaRCC [n.d.]. CaRCC - Campus Research Computing Consortium. https:\/\/carcc.org\/."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343211.3343215"},{"key":"e_1_3_2_1_16_1","unstructured":"HorovodRunner [n.d.]. HorovodRunner: distributed deep learning with Horovod. https:\/\/docs.databricks.com\/applications\/machine-learning\/train-model\/distributed-training\/horovod-runner.html.  HorovodRunner [n.d.]. HorovodRunner: distributed deep learning with Horovod. https:\/\/docs.databricks.com\/applications\/machine-learning\/train-model\/distributed-training\/horovod-runner.html."},{"key":"e_1_3_2_1_17_1","unstructured":"[\n  17\n  ]  HPCToolkit.2019. http:\/\/hpctoolkit.org\/Accessed: 2021\/06\/11 14:07:46.  [17] HPCToolkit.2019. http:\/\/hpctoolkit.org\/Accessed: 2021\/06\/11 14:07:46."},{"key":"e_1_3_2_1_18_1","unstructured":"Intel. [n.d.]. Intel Trace Analyzer and Collector. https:\/\/software.intel.com\/en-us\/trace-analyzer.  Intel. [n.d.]. Intel Trace Analyzer and Collector. https:\/\/software.intel.com\/en-us\/trace-analyzer."},{"key":"e_1_3_2_1_19_1","unstructured":"Intel Corporation. [n.d.]. Intel VTune Amplifier. https:\/\/software.intel.com\/en-us\/intel-vtune-amplifier-xe.  Intel Corporation. [n.d.]. Intel VTune Amplifier. https:\/\/software.intel.com\/en-us\/intel-vtune-amplifier-xe."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/11846802_48"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2019.00022"},{"key":"e_1_3_2_1_22_1","unstructured":"Lawrence Livermore National Laboratory. [n.d.]. PAVE: Performance Analysis and Visualization at Exascale. https:\/\/computation.llnl.gov\/project\/performance-analysis-through-visualization\/software.php.  Lawrence Livermore National Laboratory. [n.d.]. PAVE: Performance Analysis and Visualization at Exascale. https:\/\/computation.llnl.gov\/project\/performance-analysis-through-visualization\/software.php."},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. DAPSYS","author":"D.","year":"2000","unstructured":"Allen\u00a0 D. Malony and Sameer Shende. 2000. Performance Technology for Complex Parallel and Distributed Systems . In Proc. DAPSYS 2000 , G. Kotsis and P. Kacsuk (Eds). 37\u201346. Allen\u00a0D. Malony and Sameer Shende. 2000. Performance Technology for Complex Parallel and Distributed Systems. In Proc. DAPSYS 2000, G. Kotsis and P. Kacsuk (Eds). 37\u201346."},{"key":"e_1_3_2_1_24_1","unstructured":"[\n  24\n  ]  Mellanox Integrated Switch Management Solution.[n.d.]. http:\/\/www.mellanox.com\/page\/ib_fabricit_efm_management.  [24] Mellanox Integrated Switch Management Solution.[n.d.]. http:\/\/www.mellanox.com\/page\/ib_fabricit_efm_management."},{"key":"e_1_3_2_1_25_1","unstructured":"[\n  25\n  ]  Message Passing Interface Forum.[n.d.]. http:\/\/www.mpi-forum.org\/Accessed: 2021\/06\/11 14:07:46.  [25] Message Passing Interface Forum.[n.d.]. http:\/\/www.mpi-forum.org\/Accessed: 2021\/06\/11 14:07:46."},{"key":"e_1_3_2_1_26_1","unstructured":"[\n  26\n  ]  OSU InfiniBand Network Analysis and Monitoring Tool.[n.d.]. http:\/\/mvapich.cse.ohio-state.edu\/tools\/osu-inam\/.  [26] OSU InfiniBand Network Analysis and Monitoring Tool.[n.d.]. http:\/\/mvapich.cse.ohio-state.edu\/tools\/osu-inam\/."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2015.68"},{"key":"e_1_3_2_1_28_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","year":"1912","unstructured":"Adam Paszke , Sam Gross , Francisco Massa , Adam Lerer , James Bradbury , Gregory Chanan , Trevor Killeen , Zeming Lin , Natalia Gimelshein , Luca Antiga , Alban Desmaison , Andreas K\u00f6pf , Edward Yang , Zach DeVito , Martin Raison , Alykhan Tejani , Sasank Chilamkurthy , Benoit Steiner , Lu Fang , Junjie Bai , and Soumith Chintala . 2019. PyTorch: An Imperative Style , High-Performance Deep Learning Library . arxiv: 1912 .01703\u00a0[cs.LG] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. arxiv:1912.01703\u00a0[cs.LG]"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 2007 ACM\/IEEE conference on Supercomputing. 1\u201310","author":"Schulz Martin","year":"2007","unstructured":"Martin Schulz and Bronis\u00a0 R De\u00a0Supinski . 2007 . PN MPI tools: A whole lot greater than the sum of their parts . In Proceedings of the 2007 ACM\/IEEE conference on Supercomputing. 1\u201310 . Martin Schulz and Bronis\u00a0R De\u00a0Supinski. 2007. PN MPI tools: A whole lot greater than the sum of their parts. In Proceedings of the 2007 ACM\/IEEE conference on Supercomputing. 1\u201310."},{"key":"e_1_3_2_1_30_1","unstructured":"Alexander Sergeev and Mike\u00a0Del Balso. 2018. Horovod: fast and easy distributed deep learning in TensorFlow. CoRR abs\/1802.05799(2018). arxiv:1802.05799http:\/\/arxiv.org\/abs\/1802.05799  Alexander Sergeev and Mike\u00a0Del Balso. 2018. Horovod: fast and easy distributed deep learning in TensorFlow. CoRR abs\/1802.05799(2018). arxiv:1802.05799http:\/\/arxiv.org\/abs\/1802.05799"},{"key":"e_1_3_2_1_31_1","unstructured":"Virtual Institute - High Productivity Supercomputing. [n.d.]. HOPSA: A Holistic Performance System Analysis. http:\/\/www.vi-hps.org\/projects\/hopsa\/overview.  Virtual Institute - High Productivity Supercomputing. [n.d.]. HOPSA: A Holistic Performance System Analysis. http:\/\/www.vi-hps.org\/projects\/hopsa\/overview."},{"key":"e_1_3_2_1_32_1","unstructured":"xsede [n.d.]. XSEDE - Extreme Science and Engineering Discovery Environment. https:\/\/www.xsede.org\/.  xsede [n.d.]. XSEDE - Extreme Science and Engineering Discovery Environment. https:\/\/www.xsede.org\/."}],"event":{"name":"PEARC '21: Practice and Experience in Advanced Research Computing","location":"Boston MA USA","acronym":"PEARC '21","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Practice and Experience in Advanced Research Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3437359.3465582","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3437359.3465582","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3437359.3465582","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:03:31Z","timestamp":1750197811000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3437359.3465582"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,17]]},"references-count":32,"alternative-id":["10.1145\/3437359.3465582","10.1145\/3437359"],"URL":"https:\/\/doi.org\/10.1145\/3437359.3465582","relation":{},"subject":[],"published":{"date-parts":[[2021,7,17]]},"assertion":[{"value":"2021-07-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}