{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T05:22:42Z","timestamp":1761110562520,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030507428"},{"type":"electronic","value":"9783030507435"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-50743-5_26","type":"book-chapter","created":{"date-parts":[[2020,6,15]],"date-time":"2020-06-15T19:03:45Z","timestamp":1592247825000},"page":"517-535","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Communication-Aware Hardware-Assisted MPI Overlap Engine"],"prefix":"10.1007","author":[{"given":"Mohammadreza","family":"Bayatpour","sequence":"first","affiliation":[]},{"given":"Jahanzeb","family":"Hashmi Maqbool","sequence":"additional","affiliation":[]},{"given":"Sourav","family":"Chakraborty","sequence":"additional","affiliation":[]},{"given":"Kaushik","family":"Kandadi Suresh","sequence":"additional","affiliation":[]},{"given":"Seyedeh Mahdieh","family":"Ghazimirsaeed","sequence":"additional","affiliation":[]},{"given":"Bharath","family":"Ramesh","sequence":"additional","affiliation":[]},{"given":"Hari","family":"Subramoni","sequence":"additional","affiliation":[]},{"given":"Dhabaleswar K.","family":"Panda","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,6,15]]},"reference":[{"key":"26_CR1","unstructured":"Osu Micro-benchmarks (2017). http:\/\/mvapich.cse.ohio-state.edu\/benchmarks"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Alexandrov, A., Ionescu, M.F., Schauser, K.E., Scheiman, C.: LogGP: incorporating long messages into the LogP model \u2013 one step closer towards a realistic model for parallel computation. Tech. rep., Santa Barbara, CA, USA (1995)","DOI":"10.1145\/215399.215427"},{"key":"26_CR3","unstructured":"Atomic, L.S., Simulator, M.M.P.: LAMMPS (2013). https:\/\/lammps.sandia.gov\/"},{"key":"26_CR4","doi-asserted-by":"publisher","unstructured":"Bayatpour, M., Subramoni, H., Chakraborty, S., Panda, D.K.: Adaptive and dynamic design for MPI tag matching. In: 2016 IEEE International Conference on Cluster Computing (CLUSTER), pp. 1\u201310 (September 2016). https:\/\/doi.org\/10.1109\/CLUSTER.2016.69","DOI":"10.1109\/CLUSTER.2016.69"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Bayatpour, M., Ghazimirsaeed, S.M., Xu, S., Subramoni, H., Panda, D.K.: Design and characterization of infiniband hardware tag matching in MPI. In: 20th Annual IEEE\/ACM International Symposium in Cluster, Cloud, and Grid Computing (Accepted to be published) (2020)","DOI":"10.1109\/CCGrid49817.2020.00-83"},{"key":"26_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1007\/978-3-540-39924-7_46","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"R Brightwell","year":"2003","unstructured":"Brightwell, R., Underwood, K.: Evaluation of an eager protocol optimization for MPI. In: Dongarra, J., Laforenza, D., Orlando, S. (eds.) EuroPVM\/MPI 2003. LNCS, vol. 2840, pp. 327\u2013334. Springer, Heidelberg (2003). https:\/\/doi.org\/10.1007\/978-3-540-39924-7_46"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Chakraborty, S., Bayatpour, M., Hashmi, J., Subramoni, H., Panda, D.K.: Cooperative rendezvous protocols for improved performance and overlap. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 361\u2013373. IEEE (2018)","DOI":"10.1109\/SC.2018.00031"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Dosanjh, M.G., et al.: The case for semi-permanent cache occupancy: understanding the impact of data locality on network processing. In: Proceedings of the 47th International Conference on Parallel Processing, p. 73. ACM (2018)","DOI":"10.1145\/3225058.3225130"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Dosanjh, M.G., Schonbein, W., Grant, R.E., Bridges, P.G., Ghazimirsaeed, S.M., Afsahi, A.: Fuzzy matching: hardware accelerated MPI communication middleware. In: 19th Annual IEEE\/ACM International Symposium in Cluster, Cloud, and Grid Computing (CCGrid 2019) (2019)","DOI":"10.1109\/CCGRID.2019.00035"},{"key":"26_CR10","doi-asserted-by":"publisher","first-page":"102547","DOI":"10.1016\/j.parco.2019.102547","volume":"89","author":"M Ghazimirsaeed","year":"2019","unstructured":"Ghazimirsaeed, M., Grant, R., Afsahi, A.: A dynamic, unified design for dedicated message matching engines for collective and point-to-point communications. Parallel Comput. 89, 102547 (2019)","journal-title":"Parallel Comput."},{"key":"26_CR11","unstructured":"Ghazimirsaeed, S.M., Afsahi, A.: Accelerating MPI message matching by a data clustering strategy. In: High Performance Computing Symposium (HPCS 2017). Kingston (2017)"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Ghazimirsaeed, S.M., Grant, R.E., Afsahi, A.: A dedicated message matching mechanism for collective communications. In: Proceedings of the 47th International Conference on Parallel Processing Companion, p. 26. ACM (2018)","DOI":"10.1145\/3229710.3229712"},{"key":"26_CR13","doi-asserted-by":"publisher","first-page":"e4862","DOI":"10.1002\/cpe.4862","volume":"32","author":"SM Ghazimirsaeed","year":"2019","unstructured":"Ghazimirsaeed, S.M., Mirsadeghi, S.H., Afsahi, A.: Communication-aware message matching in MPI. Concurr. Comput.: Pract. Exp. 32, e4862 (2019)","journal-title":"Concurr. Comput.: Pract. Exp."},{"key":"26_CR14","doi-asserted-by":"publisher","unstructured":"Graham, R.L., et al.: Overlapping computation and communication: barrier algorithms and ConnectX-2 Core-Direct capabilities. In: 2010 IEEE International Symposium on Parallel Distributed Processing, Workshops and PhD Forum (IPDPSW), pp. 1\u20138 (April 2010). https:\/\/doi.org\/10.1109\/IPDPSW.2010.5470854","DOI":"10.1109\/IPDPSW.2010.5470854"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"Hoefler, T., Siebert, C., Lumsdaine, A.: Group operation assembly language - a flexible way to express collective communication. In: ICPP-2009 - The 38th International Conference on Parallel Processing. IEEE (September 2009)","DOI":"10.1109\/ICPP.2009.70"},{"issue":"3\u20134","key":"26_CR16","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/s00450-011-0170-4","volume":"26","author":"K Kandalla","year":"2011","unstructured":"Kandalla, K., Subramoni, H., Tomko, K., Pekurovsky, D., Sur, S., Panda, D.K.: High-performance and scalable non-blocking all-to-all with collective offload on infiniband clusters: a study with parallel 3D FFT. Comput. Sci.-Res. Dev. 26(3\u20134), 237 (2011)","journal-title":"Comput. Sci.-Res. Dev."},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Venkata, M., et al.: ConnectX-2 CORE-Direct enabled asynchronous broadcast collective communications. In: Proceedings of the 25th IEEE International Parallel and Distributed Processing Symposium, Workshops (2011)","DOI":"10.1109\/IPDPS.2011.221"},{"key":"26_CR18","unstructured":"Message Passing Interface Forum: MPI: A Message-Passing Interface Standard (March 1994)"},{"key":"26_CR19","unstructured":"MVAPICH: MPI over InfiniBand, Omni-Path, Ethernet\/iWARP, and RoCE (2017). http:\/\/mvapich.cse.ohio-state.edu\/"},{"issue":"4","key":"26_CR20","doi-asserted-by":"publisher","first-page":"C192","DOI":"10.1137\/11082748X","volume":"34","author":"D Pekurovsky","year":"2012","unstructured":"Pekurovsky, D.: P3DFFT: a framework for parallel computations of fourier transforms in three dimensions. SIAM J. Sci. Comput. 34(4), C192\u2013C209 (2012)","journal-title":"SIAM J. Sci. Comput."},{"key":"26_CR21","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.parco.2019.03.003","volume":"85","author":"A Ruhela","year":"2019","unstructured":"Ruhela, A., Subramoni, H., Chakraborty, S., Bayatpour, M., Kousha, P., Panda, D.K.D.: Efficient design for MPI asynchronous progress without dedicated resources. Parallel Comput. 85, 13\u201326 (2019)","journal-title":"Parallel Comput."},{"key":"26_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1007\/978-3-642-23397-5_26","volume-title":"Euro-Par 2011 Parallel Processing","author":"T Schneider","year":"2011","unstructured":"Schneider, T., Eckelmann, S., Hoefler, T., Rehm, W.: Kernel-based offload of collective operations \u2013 implementation, evaluation and lessons learned. In: Jeannot, E., Namyst, R., Roman, J. (eds.) Euro-Par 2011. LNCS, vol. 6853, pp. 264\u2013275. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23397-5_26"},{"key":"26_CR23","doi-asserted-by":"publisher","unstructured":"Subramoni, H., Kandalla, K., Sur, S., Panda, D.K.: Design and evaluation of generalized collective communication primitives with overlap using ConnectX-2 offload engine. In: 2010 18th IEEE Symposium on High Performance Interconnects, pp. 40\u201349 (August 2010). https:\/\/doi.org\/10.1109\/HOTI.2010.22","DOI":"10.1109\/HOTI.2010.22"},{"key":"26_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1007\/978-3-319-20119-1_31","volume-title":"High Performance Computing","author":"H Subramoni","year":"2015","unstructured":"Subramoni, H., et al.: Designing non-blocking personalized collectives with near perfect overlap for RDMA-enabled clusters. In: Kunkel, J.M., Ludwig, T. (eds.) ISC High Performance 2015. LNCS, vol. 9137, pp. 434\u2013453. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-20119-1_31"},{"key":"26_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1007\/978-3-319-58667-0_18","volume-title":"High Performance Computing","author":"H Subramoni","year":"2017","unstructured":"Subramoni, H., Chakraborty, S., Panda, D.K.: Designing dynamic and adaptive MPI point-to-point communication protocols for efficient overlap of computation and communication. In: Kunkel, J.M., Yokota, R., Balaji, P., Keyes, D. (eds.) ISC 2017. LNCS, vol. 10266, pp. 334\u2013354. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-58667-0_18"},{"key":"26_CR26","unstructured":"Unified Communication X (2019). http:\/\/www.openucx.org\/"},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Venkatesh, A., et al.: A case for application-oblivious energy-efficient MPI runtime. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2015, pp. 1\u201312. IEEE (2015)","DOI":"10.1145\/2807591.2807658"},{"key":"26_CR28","doi-asserted-by":"publisher","unstructured":"Yu, W., Buntinas, D., Panda, D.K.: High performance and reliable NIC-based multicast over Myrinet\/GM-2. In: Proceedings of 2003 International Conference on Parallel Processing, 2003, pp. 197\u2013204 (October 2003). https:\/\/doi.org\/10.1109\/ICPP.2003.1240581","DOI":"10.1109\/ICPP.2003.1240581"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-50743-5_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,18]],"date-time":"2023-12-18T20:04:55Z","timestamp":1702929895000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-50743-5_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030507428","9783030507435"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-50743-5_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"15 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Frankfurt am Main","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 June 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 June 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"35","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"87","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.73","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.33","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}