{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T10:07:33Z","timestamp":1693390053562},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2016,4,4]],"date-time":"2016-04-04T00:00:00Z","timestamp":1459728000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Institute for the Promotion of Innovation through Science and Technology in Flanders (IWT)"},{"name":"iMinds Institute"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2016,5]]},"DOI":"10.1007\/s11227-016-1707-x","type":"journal-article","created":{"date-parts":[[2016,4,4]],"date-time":"2016-04-04T17:09:42Z","timestamp":1459789782000},"page":"1973-2013","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["A novel MPI reduction algorithm resilient to imbalances in process arrival times"],"prefix":"10.1007","volume":"72","author":[{"given":"P.","family":"Marendic","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Lemeire","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"D.","family":"Vucinic","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"P.","family":"Schelkens","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,4,4]]},"reference":[{"issue":"7","key":"1707_CR1","doi-asserted-by":"crossref","first-page":"2516","DOI":"10.1007\/s11227-015-1402-3","volume":"71","author":"E Meneses","year":"2015","unstructured":"Meneses E, Kal LV (2015) Camel: collective-aware message logging. J Supercomput 71(7):2516\u20132538. doi: 10.1007\/s11227-015-1402-3","journal-title":"J Supercomput"},{"key":"1707_CR2","doi-asserted-by":"crossref","unstructured":"Ferreira KB, Bridges P, Brightwell R (2008) Characterizing application sensitivity to OS interference using kernel-level noise injection. In: Proceedings of the 2008 ACM\/IEEE conference on supercomputing (SC\u201908). IEEE Press, Piscataway, pp 19:1\u201319:12","DOI":"10.1109\/SC.2008.5219920"},{"key":"1707_CR3","doi-asserted-by":"crossref","unstructured":"Faraj A, Patarasuk P, Yuan X (2008) A study of process arrival patterns for MPI collective operations. Int J Parallel Program 36(6):571\u2013591","DOI":"10.1007\/s10766-008-0070-9"},{"key":"1707_CR4","doi-asserted-by":"crossref","unstructured":"Huang C, Lawlor O, Kale LV (2004) Adaptive MPI. In: Languages and compilers for parallel computing. Springer, New York, pp 306\u2013322","DOI":"10.1007\/978-3-540-24644-2_20"},{"key":"1707_CR5","doi-asserted-by":"crossref","unstructured":"Mamidala A, Liu J, Panda DK (2004) Efficient barrier and allreduce on infiniband clusters using multicast and adaptive algorithms. In: Proceedings of the 2004 IEEE international conference on cluster computing (CLUSTER\u201904). IEEE Computer Society, Washington, DC, pp 135\u2013144","DOI":"10.1109\/CLUSTR.2004.1392611"},{"key":"1707_CR6","doi-asserted-by":"crossref","unstructured":"Patarasuk P, Yuan X (2008) Efficient MPI bcast across different process arrival patterns. Parallel Distrib Process Symp Int 0:1\u201311. http:\/\/doi.ieeecomputersociety.org\/10.1109\/IPDPS.2008.4536308","DOI":"10.1109\/IPDPS.2008.4536308"},{"key":"1707_CR7","unstructured":"Qian Y (2010) Design and evaluation of efficient collective communications on modern interconnects and multi-core clusters. Ph.D. thesis, Queen\u2019s University, Kingston"},{"key":"1707_CR8","unstructured":"Message Passing Interface Forum, $${\\sf MPI}$$ MPI (2016) A Message-Passing Interface Standard. Version 3.1. http:\/\/www.mpi-forum.org\/docs\/mpi-3.1\/ . Accessed 4 June 2015"},{"key":"1707_CR9","doi-asserted-by":"crossref","unstructured":"Karp RM, Sahay A, Santos EE, Schauser KE (1993) Optimal broadcast and summation in the LogP model. In: Proceedings of the fifth annual ACM symposium on parallel algorithms and architectures. ACM, New York, pp 142\u2013153","DOI":"10.1145\/165231.165250"},{"key":"1707_CR10","unstructured":"Louis-Claude\u00a0Canon GA (2012) Scheduling associative reductions with homogenous costs when overlapping communications and computations. Tech. Rep. 7898, Inria"},{"key":"1707_CR11","doi-asserted-by":"crossref","unstructured":"Rabenseifner R (2004) Optimization of collective reduction operations. In: Procs. of int. conf. on computational science (ICCS), pp 1\u20139","DOI":"10.1007\/978-3-540-24685-5_1"},{"key":"1707_CR12","doi-asserted-by":"crossref","unstructured":"Rabenseifner R, Trff JL (2004) More efficient reduction algorithms for non-power-of-two number of processors in message-passing parallel systems. In: EuroPVM\/MPI, pp 36\u201346","DOI":"10.1007\/978-3-540-30218-6_13"},{"issue":"2","key":"1707_CR13","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1016\/j.jpdc.2008.09.002","volume":"69","author":"P Patarasuk","year":"2009","unstructured":"Patarasuk P, Yuan X (2009) Bandwidth optimal all-reduce algorithms for clusters of workstations. J Parallel Distrib Comput 69(2):117\u2013124","journal-title":"J Parallel Distrib Comput"},{"key":"1707_CR14","doi-asserted-by":"crossref","unstructured":"Jain N, Sabharwal Y (2010) Optimal bucket algorithms for large MPI collectives on torus interconnects. In: Proceedings of the 24th ACM international conference on supercomputing. ACM, New York, pp 27\u201336","DOI":"10.1145\/1810085.1810093"},{"issue":"13","key":"1707_CR15","doi-asserted-by":"crossref","first-page":"1749","DOI":"10.1002\/cpe.1206","volume":"19","author":"E Chan","year":"2007","unstructured":"Chan E, Heimlich M, Purkayastha A, van de Geijn R (2007) Collective communication: theory, practice, and experience. Concurr Comput Pract Exp 19(13):1749\u20131783","journal-title":"Concurr Comput Pract Exp"},{"key":"1707_CR16","doi-asserted-by":"crossref","unstructured":"Peterka T, Goodell D, Ross R, Shen H-W, Thakur R (2009) A configurable algorithm for parallel image-compositing applications. In: Proceedings of the conference on high performance computing networking, storage and analysis (SC\u201909). ACM, New York, pp 4:1\u20134:10. doi: 10.1145\/1654059.1654064","DOI":"10.1145\/1654059.1654064"},{"key":"1707_CR17","unstructured":"Kendall W, Peterka T, Huang J, Shen H-W, Ross R (2010) Accelerating and benchmarking Radix- $$k$$ k image compositing at large scale. In: Proceedings of the 10th eurographics conference on parallel graphics and visualization (EG PGV\u201910). Eurographics Association, Aire-la-Ville, pp 101\u2013110. doi: 10.2312\/EGPGV\/EGPGV10\/101-110"},{"key":"1707_CR18","doi-asserted-by":"crossref","unstructured":"Pjesivac-Grbovic J, Angskun T, Bosilca G, Fagg GE, Dongarra GJJ (2005) Performance analysis of MPI collective operations. In: IEEE international parallel and distributed processing symposium","DOI":"10.1109\/IPDPS.2005.335"},{"key":"1707_CR19","doi-asserted-by":"crossref","unstructured":"Sanders P, Speck J, Trff JL (2009) Two-tree algorithms for full bandwidth broadcast, reduction and scan. Parallel Comput 35(12):581\u2013594. (Selected papers from the 14th European PVM\/MPI users group meeting)","DOI":"10.1016\/j.parco.2009.09.001"},{"issue":"1","key":"1707_CR20","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R Thakur","year":"2005","unstructured":"Thakur R, Rabenseifner R, Gropp W (2005) Optimization of collective communication operations in MPICH. Int J High Perform Comput Appl 19(1):49\u201366","journal-title":"Int J High Perform Comput Appl"},{"issue":"2","key":"1707_CR21","first-page":"58","volume":"1","author":"T Hoefler","year":"2014","unstructured":"Hoefler T, Moor D (2014) Energy, memory, and runtime tradeoffs for implementing collective communication operations. J Supercomput Front Innov 1(2):58\u201375","journal-title":"J Supercomput Front Innov"},{"key":"1707_CR22","unstructured":"Fabrizio\u00a0Petrini SP, Kerbyson Darren J (2003) The case of the missing supercomputer performance: achieving optimal performance on the 8,192 processors of ASCI Q. In: Proceedings of the 2003 ACM\/IEEE conference on supercomputing (SC\u201903), p 55"},{"key":"1707_CR23","doi-asserted-by":"crossref","unstructured":"Agarwal S, Garg R, Vishnoi NK (2005) The impact of noise on the scaling of collectives: a theoretical approach. In: Proceedings of the 12th international conference on high performance computing (HiPC\u201905). Springer, Berlin, pp 280\u2013289. doi: 10.1007\/11602569_31","DOI":"10.1007\/11602569_31"},{"key":"1707_CR24","doi-asserted-by":"crossref","unstructured":"Hoefler T, Schneider T, Lumsdaine A (2010) Characterizing the influence of system noise on large-scale applications by simulation. In: Proceedings of the 2010 ACM\/IEEE international conference for high performance computing, networking, storage and analysis (SC\u201910). IEEE Computer Society, Washington, DC, pp 1\u201311. doi: 10.1109\/SC.2010.12","DOI":"10.1109\/SC.2010.12"},{"key":"1707_CR25","doi-asserted-by":"crossref","unstructured":"Ghysels P, Ashby TJ, Meerbergen K, Vanroose W (2013) Hiding global communication latency in the gmres algorithm on massively parallel machines. SIAM J Sci Comput 35(1):C48\u2013C71","DOI":"10.1137\/12086563X"},{"issue":"1","key":"1707_CR26","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1007\/s10586-011-0178-3","volume":"16","author":"KB Ferreira","year":"2013","unstructured":"Ferreira KB, Bridges PG, Brightwell R, Pedretti KT (2013) The impact of system design parameters on application noise sensitivity. Clust Comput 16(1):117\u2013129","journal-title":"Clust Comput"},{"key":"1707_CR27","unstructured":"Eichenberger AE, Abraham SG (1995) Impact of load imbalance on the design of software barriers. In: Proceedings of the 1995 international conference on parallel processing, pp 63\u201372"},{"key":"1707_CR28","doi-asserted-by":"crossref","unstructured":"Marendic P, Lemeire J, Haber T, Vucinic D, Schelkens P (2012) An investigation into the performance of reduction algorithms under load imbalance. In: Kaklamanis C, Papatheodorou T, Spirakis P (eds) Euro-Par 2012 parallel processing. Lecture notes in computer science, vol 7484. Springer, Berlin, pp 439\u2013450. doi: 10.1007\/978-3-642-32820-6_44","DOI":"10.1007\/978-3-642-32820-6_44"},{"key":"1707_CR29","doi-asserted-by":"crossref","unstructured":"Chan EW, Heimlich MF, Purkayastha A, Van De\u00a0Geijn RA (2004) On optimizing collective communication. In: 2004 IEEE international conference on cluster computing. IEEE, pp 145\u2013155","DOI":"10.1109\/CLUSTR.2004.1392612"},{"issue":"7","key":"1707_CR30","doi-asserted-by":"crossref","first-page":"887","DOI":"10.1016\/j.jpdc.2007.12.001","volume":"68","author":"JL Tr\u00e4ff","year":"2008","unstructured":"Tr\u00e4ff JL, Ripke A (2008) Optimal broadcast for fully connected processor-node networks. J Parallel Distrib Comput 68(7):887\u2013901","journal-title":"J Parallel Distrib Comput"},{"key":"1707_CR31","doi-asserted-by":"crossref","unstructured":"Lastovetsky A, Rychkov V, OFlynn M, Mpiblib (2008) Benchmarking MPI communications for parallel computing on homogeneous and heterogeneous clusters. In: Recent advances in parallel virtual machine and message passing interface. Springer, New York, pp 227\u2013238","DOI":"10.1007\/978-3-540-87475-1_32"},{"issue":"3","key":"1707_CR32","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1016\/S0167-8191(06)80021-9","volume":"20","author":"RW Hockney","year":"1994","unstructured":"Hockney RW (1994) The communication challenge for MPP: Intel Paragon and Meiko CS-2. Parallel Comput 20(3):389\u2013398. doi: 10.1016\/S0167-8191(06)80021-9","journal-title":"Parallel Comput"},{"issue":"1\u20134","key":"1707_CR33","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1007\/BF01840439","volume":"1","author":"ML Fredman","year":"1986","unstructured":"Fredman ML, Sedgewick R, Sleator DD, Tarjan RE (1986) The pairing heap: a new form of self-adjusting heap. Algorithmica 1(1\u20134):111\u2013129","journal-title":"Algorithmica"},{"key":"1707_CR34","doi-asserted-by":"crossref","unstructured":"Pettie S (2005) Towards a final analysis of pairing heaps. In: 46th annual IEEE symposium on foundations of computer science (FOCS\u201905). IEEE, pp 174\u2013183","DOI":"10.1109\/SFCS.2005.75"},{"issue":"4","key":"1707_CR35","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1109\/38.291532","volume":"14","author":"K-L Ma","year":"1994","unstructured":"Ma K-L, Painter JS, Hansen CD, Krogh MF (1994) Parallel volume rendering using binary-swap compositing. Comput Graph Appl IEEE 14(4):59\u201368","journal-title":"Comput Graph Appl IEEE"},{"issue":"2","key":"1707_CR36","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1023\/A:1008165001515","volume":"18","author":"D-L Yang","year":"2001","unstructured":"Yang D-L, Yu J-C, Chung Y-C (2001) Efficient compositing methods for the sort-last-sparse parallel volume rendering system on distributed memory multicomputers. J Supercomput 18(2):201\u2013220. doi: 10.1023\/A:1008165001515","journal-title":"J Supercomput"},{"key":"1707_CR37","doi-asserted-by":"crossref","unstructured":"Gropp W, Lusk E (1999) Reproducible measurements of MPI performance characteristics. Springer, New York, pp 11\u201318","DOI":"10.1007\/3-540-48158-3_2"},{"key":"1707_CR38","unstructured":"Corporation I (2013) Intel MPI benchmarks 4.1. https:\/\/software.intel.com\/en-us\/articles\/intel-mpi-benchmarks . Accessed 1 April 2016"},{"issue":"4","key":"1707_CR39","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1080\/17445760902894688","volume":"25","author":"T Hoefler","year":"2010","unstructured":"Hoefler T, Schneider T, Lumsdaine A (2010) Accurately measuring overhead, communication time and progression of blocking and nonblocking collective operations at massive scale. Int J Parallel Emerg Distrib Syst 25(4):241\u2013258. doi: 10.1080\/17445760902894688","journal-title":"Int J Parallel Emerg Distrib Syst"},{"issue":"4","key":"1707_CR40","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1080\/17445760902894688","volume":"25","author":"TST Hoefler","year":"2010","unstructured":"Hoefler TST, Lumsdaine A (2010) Accurately measuring overhead, communication time and progression of blocking and nonblocking collective operations at massive scale. Int J Parallel Emerg Distrib Syst 25(4):241\u2013258","journal-title":"Int J Parallel Emerg Distrib Syst"},{"key":"1707_CR41","doi-asserted-by":"crossref","unstructured":"Gropp W, Lusk E (1999) Reproducible measurements of mpi performance characteristics. In: Recent advances in parallel virtual machine and message passing interface. Springer, New York, pp 11\u201318","DOI":"10.1007\/3-540-48158-3_2"},{"issue":"1","key":"1707_CR42","first-page":"55","volume":"10","author":"R Reussner","year":"2002","unstructured":"Reussner R, Sanders P, Tr\u00e4ff JL, Skampi (2002) A comprehensive benchmark for public benchmarking of MPI. Sci Program 10(1):55\u201365","journal-title":"Sci Program"},{"issue":"2","key":"1707_CR43","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1007\/s11227-005-2340-2","volume":"34","author":"DA Grove","year":"2005","unstructured":"Grove DA, Coddington PD (2005) Communication benchmarking and performance modelling of mpi programs on cluster computers. J Supercomput 34(2):201\u2013217","journal-title":"J Supercomput"},{"key":"1707_CR44","doi-asserted-by":"crossref","unstructured":"Tr\u00e4ff JL (2012) Mpicroscope: towards an MPI benchmark tool for performance guideline verification. In: Recent advances in the message passing interface\u2014proceedings of 19th European MPI users\u2019 group meeting (EuroMPI\u201912), Austria, pp 100\u2013109. doi: 10.1007\/978-3-642-33518-1_15","DOI":"10.1007\/978-3-642-33518-1_15"},{"key":"1707_CR45","doi-asserted-by":"crossref","unstructured":"Hunold S, Carpen-Amarie A, Tr\u00e4ff JL (2014) Reproducible MPI micro-benchmarking isn\u2019t as easy as you think. In: Proceedings of the 21st European MPI users\u2019 group meeting. ACM, New York, p 69","DOI":"10.1145\/2642769.2642785"},{"key":"1707_CR46","unstructured":"NIST\/SEMATECH (2012) E-handbook of statistical methods. http:\/\/www.itl.nist.gov\/div898\/handbook\/ . Accessed June 2015"},{"key":"1707_CR47","doi-asserted-by":"crossref","unstructured":"Buranapanichkit D, Deligiannis N, Andreopoulos Y (2015) Convergence of desynchronization primitives in wireless sensor networks: stochastic modeling approach. CoRR. arXiv:1411.2862","DOI":"10.1109\/TSP.2014.2369003"},{"issue":"9","key":"1707_CR48","doi-asserted-by":"crossref","first-page":"3336","DOI":"10.1109\/TCOMM.2015.2455036","volume":"63","author":"N Deligiannis","year":"2015","unstructured":"Deligiannis N, Mota JF, Smart G, Andreopoulos Y (2015) Fast desynchronization for decentralized multichannel medium access control. IEEE Trans Commun 63(9):3336\u20133349","journal-title":"IEEE Trans Commun"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-016-1707-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-016-1707-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-016-1707-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,6]],"date-time":"2019-09-06T03:30:58Z","timestamp":1567740658000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-016-1707-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,4,4]]},"references-count":48,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2016,5]]}},"alternative-id":["1707"],"URL":"https:\/\/doi.org\/10.1007\/s11227-016-1707-x","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,4,4]]}}}