{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T11:23:54Z","timestamp":1707823434015},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2009,3,6]],"date-time":"2009-03-06T00:00:00Z","timestamp":1236297600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2009,4]]},"DOI":"10.1007\/s10766-009-0094-9","type":"journal-article","created":{"date-parts":[[2009,3,5]],"date-time":"2009-03-05T11:00:56Z","timestamp":1236250856000},"page":"223-246","source":"Crossref","is-referenced-by-count":11,"title":["A Speculative and Adaptive MPI Rendezvous Protocol Over RDMA-enabled Interconnects"],"prefix":"10.1007","volume":"37","author":[{"given":"Mohammad J.","family":"Rashti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmad","family":"Afsahi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2009,3,6]]},"reference":[{"key":"94_CR1","unstructured":"MPI: A Message-Passing Interface standard, MPI Forum (1997)"},{"key":"94_CR2","doi-asserted-by":"crossref","unstructured":"Goumas, G., Sotiropoulos, A., Koziris, N.: Minimizing completion time for loop tiling with computation and communication overlapping. In: Proceedings of 15th IEEE\/ACM International Parallel and Distributed Processing Symposium (IPDPS\u201901) (2001). doi: 10.1109\/IPDPS.2001.924976","DOI":"10.1109\/IPDPS.2001.924976"},{"key":"94_CR3","doi-asserted-by":"crossref","unstructured":"Fishgold, L., Danalis, A., Pollock, L., Swany, M.: An automated approach to improve communication-computation overlap in clusters. In: 2006 NSF Next Generation Software Workshop (NSFNGS 2006). Proceedings of 20th IEEE\/ACM International Parallel and Distributed Processing Symposium (IPDPS\u201906) (2006). doi: 10.1109\/IPDPS.2006.1639590","DOI":"10.1109\/IPDPS.2006.1639590"},{"issue":"2","key":"94_CR4","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1177\/1094342005054257","volume":"19","author":"R. Brightwell","year":"2005","unstructured":"Brightwell R., Riesen R., Underwood K.D.: Analyzing the impact of overlap, offload, and independent progress for Message Passing Interface applications. Int. J. High Perform. Comput. Appl. 19(2), 103\u2013117 (2005). doi: 10.1177\/1094342005054257","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"94_CR5","doi-asserted-by":"crossref","unstructured":"Rashti, M.J., Afsahi, A.: Assessing the ability of computation\/communication overlap and communication progress in modern interconnects. In: Proceedings of 15th Annual IEEE Symposium on High-Performance Interconnects (Hot Interconnects 2007), pp. 117\u2013124 (2007). doi: 10.1109\/HOTI.2007.12","DOI":"10.1109\/HOTI.2007.12"},{"key":"94_CR6","doi-asserted-by":"crossref","unstructured":"Wagner, A., Jin, H., Panda, D.K., Riesen, R.: NIC-based offload of dynamic user-defined modules for Myrinet clusters. In: Proceedings of 6th IEEE International Conference on Cluster Computing (Cluster\u201904), pp. 205\u2013214 (2004). doi: 10.1109\/CLUSTR.2004.1392618","DOI":"10.1109\/CLUSTR.2004.1392618"},{"key":"94_CR7","doi-asserted-by":"crossref","unstructured":"Sitsky, D., Hayashi, K.: An MPI library which uses polling, interrupts and remote copying for the Fujitsu AP1000+. In: Proceedings of International Symposium on Parallel Architectures, Algorithms, and Networks, pp. 43\u201349 (1996). doi: 10.1109\/ISPAN.1996.508959","DOI":"10.1109\/ISPAN.1996.508959"},{"key":"94_CR8","doi-asserted-by":"crossref","unstructured":"Sur, S., Jin, H., Chai, L., Panda, D.K.: RDMA Read based Rendezvous protocol for MPI over InfiniBand: design alternatives and benefits. In: Proceedings of 11th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP 2006), pp. 32\u201339 (2006). doi: 10.1145\/1122971.1122978","DOI":"10.1145\/1122971.1122978"},{"key":"94_CR9","doi-asserted-by":"crossref","unstructured":"Trahay, F., Denis, A., Aumage, O., Namyst, R.: Improving reactivity and communication overlap in MPI using a generic I\/O manager. In: Proceedings of Euro PVM\/MPI 2007, LNCS 4757, pp. 170\u2013177 (2007)","DOI":"10.1007\/978-3-540-75416-9_27"},{"key":"94_CR10","unstructured":"MPICH2: http:\/\/www-unix.mcs.anl.gov\/mpi\/mpich2\/"},{"key":"94_CR11","doi-asserted-by":"crossref","unstructured":"Rashti, M.J., Afsahi, A.: 10-Gigabit iWARP Ethernet: comparative performance analysis with InfiniBand and Myrinet-10G. In: 7th IEEE Workshop on Communication Architecture for Clusters (CAC\u201907). Proceedings of 21st IEEE\/ACM International Parallel and Distributed Processing Symposium (IPDPS\u201907) (2007). doi: 10.1109\/IPDPS.2007.370480","DOI":"10.1109\/IPDPS.2007.370480"},{"key":"94_CR12","doi-asserted-by":"crossref","unstructured":"Rashti, M.J., Afsahi, A.: Improving communication progress and overlap in MPI Rendezvous protocol over RDMA-enabled interconnects. In: Proceedings of 22nd International Symposium on High Performance Computing Systems and Applications (HPCS 2008), pp. 95\u2013101 (2008). doi: 10.1109\/HPCS.2008.10","DOI":"10.1109\/HPCS.2008.10"},{"key":"94_CR13","unstructured":"National Aeronautics and Space Administration (NASA): NAS Parallel Benchmarks (NPB) for MPI, http:\/\/www.nas.nasa.gov\/Resources\/Software\/npb.html\/"},{"issue":"2","key":"94_CR14","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1016\/S0167-8191(02)00222-3","volume":"29","author":"H. Shan","year":"2003","unstructured":"Shan H., Singh J.P., Oliker L., Biswas R.: Message passing and shared address space parallelism on an SMP cluster. J. Parallel Comput. 29(2), 167\u2013186 (2003). doi: 10.1016\/S0167-8191(02)00222-3","journal-title":"J. Parallel Comput."},{"issue":"2","key":"94_CR15","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1023\/A:1022852505633","volume":"6","author":"F. Petrini","year":"2003","unstructured":"Petrini F., Coll S., Frachtenberg E., Hoisie A.: Performance evaluation of the Quadrics interconnection network. J. Cluster Comput. 6(2), 125\u2013142 (2003). doi: 10.1023\/A:1022852505633","journal-title":"J. Cluster Comput."},{"key":"94_CR16","doi-asserted-by":"crossref","unstructured":"Brightwell, R., Doerfler, D., Underwood, K.D.: A comparison of 4X InfiniBand and Quadrics elan-4 technologies. In: Proceedings of 6th IEEE International Conference on Cluster Computing (Cluster\u201904), pp. 193\u2013204 (2004). doi: 10.1109\/CLUSTR.2004.1392617","DOI":"10.1109\/CLUSTR.2004.1392617"},{"key":"94_CR17","unstructured":"InfiniBand Trade Association, InfiniBand Architecture Specification, vol. 1, October (2004)"},{"issue":"4","key":"94_CR18","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1109\/MM.2005.75","volume":"25","author":"J. Beecroft","year":"2005","unstructured":"Beecroft J., Addison D., Hewson D., McLaren M., Roweth D., Petrini F., Nieplocha J.: QsNetII: Defining high-performance network design. IEEE Micro 25(4), 34\u201347 (2005). doi: 10.1109\/MM.2005.75","journal-title":"IEEE Micro"},{"key":"94_CR19","doi-asserted-by":"crossref","unstructured":"Doerfler, D., Brightwell, R.: Measuring MPI send and receive overhead and application availability in high performance network interfaces. In: Proceedings of EuroPVM\/MPI 2006, LNCS 4192, pp. 331\u2013338 (2006)","DOI":"10.1007\/11846802_46"},{"key":"94_CR20","unstructured":"Liu, J., Chandrasekaran, B., Wu, J., Jiang, W., Kini, S., Yu, W., Buntinas, D., Wyckoff, P., Panda, D.K.: Performance comparison of MPI implementations over InfiniBand, Myrinet and Quadrics. In: Proceedings of 2003 ACM\/IEEE Conference on Supercomputing (SC 2003) (2003). doi: 10.1109\/SC.2003.10007"},{"key":"94_CR21","doi-asserted-by":"crossref","unstructured":"Zamani, R., Qian, Y., Afsahi, A.: An evaluation of the Myrinet\/GM2 two-port networks. In: 3rd IEEE Workshop on High-Speed Local Networks (HSLN 2004). Proceedings of 2004 International Conference on Local Area Networks (LCN 2004), pp. 734\u2013742 (2004). doi: 10.1109\/LCN.2004.20","DOI":"10.1109\/LCN.2004.20"},{"key":"94_CR22","unstructured":"Mellanox Technologies, Inc.: http:\/\/www.mellanox.com\/"},{"key":"94_CR23","unstructured":"Myricom. http:\/\/www.myricom.com\/"},{"key":"94_CR24","unstructured":"NetEffect, Inc.: NetEffect NE020 10Gb iWARP Ethernet channel adapter. http:\/\/www.neteffect.com\/"},{"key":"94_CR25","unstructured":"RDMA Consortium: iWARP protocol specification, http:\/\/www.rdmaconsortium.org\/"},{"key":"94_CR26","doi-asserted-by":"crossref","unstructured":"Amerson, G., Apon, A.: Implementation and design analysis of a network messaging module using virtual interface architecture. In: Proceedings of 6th IEEE International Conference on Cluster Computing (Cluster\u201904), pp. 255\u2013265 (2004). doi: 10.1109\/CLUSTR.2004.1392623","DOI":"10.1109\/CLUSTR.2004.1392623"},{"key":"94_CR27","unstructured":"MVAPICH: http:\/\/mvapich.cse.ohio-state.edu\/index.shtml\/"},{"key":"94_CR28","doi-asserted-by":"crossref","unstructured":"Kumar, R., Mamidala, A.R., Koop, M.J., Santhanaraman, G., Panda, D.K.: Lock-free asynchronous Rendezvous design for MPI point-to-point communication. In: Proceedings of EuroPVM\/MPI 2008, LNCS 5205, pp. 185\u2013193 (2008)","DOI":"10.1007\/978-3-540-87475-1_27"},{"key":"94_CR29","doi-asserted-by":"crossref","unstructured":"Pakin, S.: Receiver-initiated message passing over RDMA networks. In: Proceedings of 22nd IEEE International Parallel and Distributed Processing Symposium (IPDPS 2008) (2008). doi: 10.1109\/IPDPS.2008.4536262","DOI":"10.1109\/IPDPS.2008.4536262"},{"issue":"5","key":"94_CR30","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1147\/rd.515.0559","volume":"51","author":"T. Chen","year":"2007","unstructured":"Chen T., Raghavan R., Dale J.N., Iwata E.: Cell Broadband Engine architecture and its first implementation\u2014a performance view. IBM J. Res. Develop. 51(5), 559\u2013572 (2007)","journal-title":"IBM J. Res. Develop."}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-009-0094-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-009-0094-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-009-0094-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T19:59:25Z","timestamp":1559246365000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-009-0094-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,3,6]]},"references-count":30,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2009,4]]}},"alternative-id":["94"],"URL":"https:\/\/doi.org\/10.1007\/s10766-009-0094-9","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"value":"0885-7458","type":"print"},{"value":"1573-7640","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,3,6]]}}}