{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:13:38Z","timestamp":1763468018161,"version":"3.38.0"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2010,10,24]],"date-time":"2010-10-24T00:00:00Z","timestamp":1287878400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2011,8]]},"DOI":"10.1007\/s10766-010-0152-3","type":"journal-article","created":{"date-parts":[[2010,10,23]],"date-time":"2010-10-23T06:10:06Z","timestamp":1287814206000},"page":"473-493","source":"Crossref","is-referenced-by-count":9,"title":["Process Arrival Pattern Aware Alltoall and Allgather on InfiniBand Clusters"],"prefix":"10.1007","volume":"39","author":[{"given":"Ying","family":"Qian","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmad","family":"Afsahi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,10,24]]},"reference":[{"key":"152_CR1","unstructured":"MPI: A Message Passing Interface standard (1997)"},{"issue":"6","key":"152_CR2","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1007\/s10766-008-0070-9","volume":"36","author":"A. Faraj","year":"2008","unstructured":"Faraj A., Patarasuk P., Yuan X.: A study of process arrival patterns for MPI collective operations. Int. J. Parallel Program. 36(6), 543\u2013570 (2008)","journal-title":"Int. J. Parallel Program."},{"key":"152_CR3","doi-asserted-by":"crossref","unstructured":"Patarasuk, P., Yuan, X.: Efficient MPI_Bcast across different process arrival patterns. In: Proceedings 22nd International Parallel and Distributed Processing Symposium (IPDPS). (2008)","DOI":"10.1109\/IPDPS.2008.4536308"},{"key":"152_CR4","unstructured":"InfiniBand Architecture, http:\/\/www.infinibandta.org"},{"key":"152_CR5","doi-asserted-by":"crossref","unstructured":"Qian, Y., Afsahi, A.: Process arrival pattern and shared memory aware alltoall on InfiniBand. 16th EuroPVM\/MPI Lecture Notes in Computer Science (LNCS 5759), pp. 250\u2013260. (2009)","DOI":"10.1007\/978-3-642-03770-2_31"},{"key":"152_CR6","unstructured":"MVAPICH, http:\/\/www.mvapich.cse.ohio-state.edu"},{"key":"152_CR7","unstructured":"ConnectX InfiniBand Adapters, product brief, Mellanox Technologies, Inc. http:\/\/www.mellanox.com\/pdf\/products\/hca\/ConnectX_IB_Card.pdf"},{"key":"152_CR8","unstructured":"Virtual Protocol Interconnect (VPI), product brief, Mellanox Technologies, Inc. http:\/\/www.mellanox.com\/related-docs\/prod_adapter_cards\/PB_ConnectX_VPI.pdf"},{"issue":"11","key":"152_CR9","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1109\/71.642949","volume":"8","author":"J. Bruck","year":"1997","unstructured":"Bruck J., Ho C.-T., Kipnis S., Upfal E., Weathersby D.: Efficient algorithms for all-to-all communications in multiport message-passing systems. IEEE Trans. Parallel Distrib. Syst. 8(11), 1143\u20131156 (1997)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"1","key":"152_CR10","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R. Thakur","year":"2005","unstructured":"Thakur R., Rabenseifner R., Gropp W.: Optimization of collective communication operations in MPICH. Int. J. High Perform. Comput. Appl. 19(1), 49\u201366 (2005)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"152_CR11","doi-asserted-by":"crossref","unstructured":"Sur, S., Bondhugula, U.K.R., Mamidala, A., Jin, H.-W., Panda, D.K.: High performance RDMA based all-to-all broadcast for InfiniBand clusters. In: Proceedings 12th International Conference on High Performance Computing (HiPC). (2005)","DOI":"10.1007\/11602569_19"},{"key":"152_CR12","unstructured":"NAS Benchmarks, version 2.4, http:\/\/www.nas.nasa.gov\/Resources\/Software\/npb.html"},{"issue":"4","key":"152_CR13","first-page":"341","volume":"11","author":"Y. Qian","year":"2008","unstructured":"Qian Y., Afsahi A.: Efficient shared memory and RDMA based collectives on multi-rail QsNetII SMP clusters. Cluster Comput. J. Networks Softw. Tools Appl. 11(4), 341\u2013354 (2008)","journal-title":"Cluster Comput. J. Networks Softw. Tools Appl."},{"key":"152_CR14","unstructured":"Tipparaju, V., Nieplocha, J., Panda, D.K.: Fast collective operations using shared and remote memory access protocols on clusters. In: Proceedings 17th International Parallel and Distributed Processing Symposium (IPDPS). (2003)"},{"key":"152_CR15","unstructured":"Qian, Y., Rashti, M.J., Afsahi, A.: Multi-connection and multi-core aware all-gather on InfiniBand clusters. In: Proceedings 20th IASTED International Conference on Parallel and Distributed Computing and Systems (PDCS), pp. 245\u2013251. (2008)"},{"key":"152_CR16","unstructured":"OpenFabrics Alliance Homepage, http:\/\/www.openfabrics.org"},{"issue":"2","key":"152_CR17","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1016\/S0167-8191(02)00222-3","volume":"29","author":"H. Shan","year":"2003","unstructured":"Shan H., Singh J.P., Oliker L., Biswas R.: Message passing and shared address space parallelism on an SMP cluster. Parallel Comput. 29(2), 167\u2013186 (2003)","journal-title":"Parallel Comput."},{"key":"152_CR18","unstructured":"MPICH, http:\/\/www.mcs.anl.gov\/research\/projects\/mpich2"},{"key":"152_CR19","doi-asserted-by":"crossref","unstructured":"Vadhiyar, S.S., Fagg, G.E., Dongarra, J.: Automatically tuned collective communications. In: Proceedings 2000 ACM\/IEEE Conference on Supercomputing (SC). (2000)","DOI":"10.1109\/SC.2000.10024"},{"key":"152_CR20","doi-asserted-by":"crossref","unstructured":"Buntinas, D., Mercier, G., Gropp, W.: Data transfers between processes in an SMP system: performance study and application to MPI. In: Procedings 35th International Conference on Parallel Processing (ICPP), pp. 487\u2013496. (2006)","DOI":"10.1109\/ICPP.2006.31"},{"key":"152_CR21","doi-asserted-by":"crossref","unstructured":"Sistare, S., vandeVaart, R., Loh, E.: Optimization of MPI collectives on clusters of large-scale SMPs. In: Proceedings 1999 ACM\/IEEE Conference on Supercomputing (SC). (1999)","DOI":"10.1145\/331532.331555"},{"key":"152_CR22","doi-asserted-by":"crossref","unstructured":"Mamidala, A.R., Chai, L., Jin, H.-W., Panda, D.K.: Efficient SMP-aware MPI-level broadcast over InfiniBand\u2019s hardware multicast. Workshop on Communication Architecture on Clusters (CAC). In: Proceedings of 20th International Parallel and Distributed Processing Symposium (IPDPS). Pittsburgh, PA (2006)","DOI":"10.1109\/IPDPS.2006.1639562"},{"key":"152_CR23","unstructured":"Wu, M., Kendall, R.A., Wright, K.: Optimizing collective communications on SMP clusters. In: Proceedings 34th International Conference on Parallel Processing (ICPP), pp. 399\u2013407. (2005)"},{"key":"152_CR24","doi-asserted-by":"crossref","unstructured":"Traff, J.L.: Efficient allgather for regular SMP-clusters. In: Proceedings EuroPVM\/MPI, pp. 58\u201365. (2006)","DOI":"10.1007\/11846802_16"},{"key":"152_CR25","doi-asserted-by":"crossref","unstructured":"Ritzdorf, H., Traff, J.L.: Collective operations in NEC\u2019s high-performance MPI libraries. In: Proceedings 20th International Parallel and Distributed Processing Symposium (IPDPS). (2006)","DOI":"10.1109\/IPDPS.2006.1639334"},{"key":"152_CR26","doi-asserted-by":"crossref","unstructured":"Sur, S., Jin, H.-W., Panda, D.K.: Efficient and scalable all-to-all personalized exchange for InfiniBand clusters. In: Proceedings 33rd International Conference on Parallel Processing (ICCP), pp. 275\u2013282. (2004)","DOI":"10.1109\/ICPP.2004.1327932"},{"key":"152_CR27","doi-asserted-by":"crossref","unstructured":"Mamidala, A.R., Vishnu, A., Panda, D.K.: Efficient shared memory and RDMA based design for MPI-allgather over InfiniBand. In: Proceedings EuroPVM\/MPI, pp. 66\u201375. (2006)","DOI":"10.1007\/11846802_17"},{"key":"152_CR28","doi-asserted-by":"crossref","unstructured":"Mamidala, A., Kumar, R., De, D., Panda, D.K.: MPI collectives on modern multicore clusters: performance optimizations and communication characteristics. In: Proceedings 8th International Symposium on Cluster Computing and the Grid (CCGrid). (2008)","DOI":"10.1109\/CCGRID.2008.87"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-010-0152-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-010-0152-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-010-0152-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T03:56:45Z","timestamp":1740628605000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-010-0152-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,10,24]]},"references-count":28,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2011,8]]}},"alternative-id":["152"],"URL":"https:\/\/doi.org\/10.1007\/s10766-010-0152-3","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2010,10,24]]}}}