{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T16:56:17Z","timestamp":1770224177445,"version":"3.49.0"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030050504","type":"print"},{"value":"9783030050511","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-05051-1_17","type":"book-chapter","created":{"date-parts":[[2018,12,6]],"date-time":"2018-12-06T16:38:48Z","timestamp":1544114328000},"page":"242-256","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["TAMM: A New Topology-Aware Mapping Method for Parallel Applications on the Tianhe-2A Supercomputer"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2931-4893","authenticated-orcid":false,"given":"Xinhai","family":"Chen","sequence":"first","affiliation":[]},{"given":"Jie","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Shengguo","family":"Li","sequence":"additional","affiliation":[]},{"given":"Peizhen","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Lihua","family":"Chi","sequence":"additional","affiliation":[]},{"given":"Qinglin","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,12,7]]},"reference":[{"key":"17_CR1","doi-asserted-by":"publisher","unstructured":"Bhatele, A., Laxmikant, V.: An evaluative study on the effect of contention on message latencies in large supercomputers. In: 2009 IEEE International Symposium on Parallel and Distributed Processing (IPDPS), pp. 1\u20138 (2009). \n                      https:\/\/doi.org\/10.1109\/IPDPS.2009.5161094","DOI":"10.1109\/IPDPS.2009.5161094"},{"key":"17_CR2","unstructured":"Bhatele, A.: Automating topology aware mapping for supercomputers. Ph.D. thesis, University of Illinois at Urbana-Champaign, Champaign, IL, USA (2010)"},{"key":"17_CR3","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1016\/j.compfluid.2012.01.019","volume":"80","author":"B Brandfass","year":"2013","unstructured":"Brandfass, B., Alrutz, T., Gerhold, T.: Rank reordering for mpi communication optimization. Comput. Fluids 80, 372\u2013380 (2013). \n                      https:\/\/doi.org\/10.1016\/j.compfluid.2012.01.019","journal-title":"Comput. Fluids"},{"key":"17_CR4","doi-asserted-by":"publisher","unstructured":"Cao, J., Xiao, L., Pang, Z., Wang, K., Xu, J.: The efficient in-band management for interconnect network in Tianhe-2 system. In: 2016 24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP), pp. 18\u201326 (2016). \n                      https:\/\/doi.org\/10.1109\/PDP.2016.58","DOI":"10.1109\/PDP.2016.58"},{"key":"17_CR5","doi-asserted-by":"publisher","unstructured":"Chen, H., Chen, W., Huang, J., Robert, B., Kuhn, H.: MPIPP: an automatic profile-guided parallel process placement toolset for SMP clusters and multiclusters. In: Proceedings of the 20th Annual International Conference on Supercomputing, ICS 2006, pp. 353\u2013360. ACM (2006). \n                      https:\/\/doi.org\/10.1145\/1183401.1183451","DOI":"10.1145\/1183401.1183451"},{"key":"17_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/978-3-642-29737-3_34","volume-title":"Euro-Par 2011: Parallel Processing Workshops","author":"IS Duff","year":"2012","unstructured":"Duff, I.S.: European exascale software initiative: numerical libraries, solvers and algorithms. In: Alexander, M., et al. (eds.) Euro-Par 2011. LNCS, vol. 7155, pp. 295\u2013304. Springer, Heidelberg (2012). \n                      https:\/\/doi.org\/10.1007\/978-3-642-29737-3_34"},{"key":"17_CR7","doi-asserted-by":"publisher","unstructured":"Ercal, F., Ramanujam, J., Sadayappan, P.: Task allocation onto a hypercube by recursive mincut bipartitioning. In: Proceedings of the Third Conference on Hypercube Concurrent Computers and Applications: Architecture, Software, Computer Systems, and General Issues, C3P, vol. 1, pp. 210\u2013221. ACM (1988). \n                      https:\/\/doi.org\/10.1145\/62297.62323","DOI":"10.1145\/62297.62323"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Fujiwara, T., Malakar, P., Reda, K., Vishwanath, V., Papka, M.E., Ma, K.L.: A visual analytics system for optimizing communications in massively parallel applications. In: IEEE Conference on Visual Analytics Science and Technology (2017)","DOI":"10.1109\/VAST.2017.8585646"},{"key":"17_CR9","doi-asserted-by":"publisher","unstructured":"Galvez, J.J., Jain, N., Kale, L.V.: Automatic topology mapping of diverse large-scale parallel applications. In: Proceedings of the International Conference on Supercomputing, ICS 2017, pp. 17:1\u201317:10. ACM (2017). \n                      https:\/\/doi.org\/10.1145\/3079079.3079104","DOI":"10.1145\/3079079.3079104"},{"issue":"4","key":"17_CR10","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1177\/1094342009347766","volume":"23","author":"A Geist","year":"2009","unstructured":"Geist, A., Dosanjh, S.: IESP exascale challenge: co-design of architectures and algorithms. Int. J. High Perform. Comput. Appl. 23(4), 401\u2013402 (2009). \n                      https:\/\/doi.org\/10.1177\/1094342009347766","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"17_CR11","doi-asserted-by":"publisher","unstructured":"Georgiou, Y., Jeannot, E., Mercier, G., Villiermet, A.: Topology-aware job mapping. Int. J. High Perform. Comput. Appl. 63 (2017). \n                      https:\/\/doi.org\/10.1109\/SC.2006.63","DOI":"10.1109\/SC.2006.63"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Hendrickson, B., Leland, R.: The Chaco user\u2019s guide: version 2.0. Technical report, Sandia National Laboratory (1994)","DOI":"10.2172\/10106339"},{"key":"17_CR13","doi-asserted-by":"publisher","unstructured":"Hoefler, T., Jeannot, E., Mercier, G.: An overview of topology mapping algorithms and techniques in high-performance computing, Chap. 5, pp. 73\u201394. Wiley-Blackwell (2014).\n                      https:\/\/doi.org\/10.1002\/9781118711897.ch5","DOI":"10.1002\/9781118711897.ch5"},{"key":"17_CR14","doi-asserted-by":"publisher","unstructured":"Hoefler, T., Snir, M.: Generic topology mapping strategies for large-scale parallel architectures. In: Proceedings of the International Conference on Supercomputing, ICS 2011. pp. 75\u201384. ACM(2011). \n                      https:\/\/doi.org\/10.1145\/1995896.1995909","DOI":"10.1145\/1995896.1995909"},{"issue":"4","key":"17_CR15","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1109\/TPDS.2013.104","volume":"25","author":"E Jeannot","year":"2014","unstructured":"Jeannot, E., Mercier, G., Tessier, F.: Process placement in multicore clusters:algorithmic issues and practical techniques. IEEE Trans. Parallel Distrib. Syst. 25(4), 993\u20131002 (2014). \n                      https:\/\/doi.org\/10.1109\/TPDS.2013.104","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"17_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1007\/978-3-642-15291-7_20","volume-title":"Euro-Par 2010 - Parallel Processing","author":"E Jeannot","year":"2010","unstructured":"Jeannot, E., Mercier, G.: Near-optimal placement of MPI processes on hierarchical NUMA architectures. In: D\u2019Ambra, P., Guarracino, M., Talia, D. (eds.) Euro-Par 2010. LNCS, vol. 6272, pp. 199\u2013210. Springer, Heidelberg (2010). \n                      https:\/\/doi.org\/10.1007\/978-3-642-15291-7_20"},{"key":"17_CR17","unstructured":"Karypis, G., Kumar, V.: Metis: a software package for partitioning unstructured graphs. International Cryogenics Monograph, pp. 121\u2013124 (1998)"},{"key":"17_CR18","doi-asserted-by":"publisher","unstructured":"Li, S., Hoefler, T., Snir, M.: NUMA-aware shared-memory collective communication for MPI. In: Proceedings of the 22nd International Symposium on High-Performance Parallel and Distributed Computing, HPDC 2013, pp. 85\u201396. ACM (2013). \n                      https:\/\/doi.org\/10.1145\/2462902.2462903","DOI":"10.1145\/2462902.2462903"},{"issue":"2","key":"17_CR19","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/s11390-015-1520-7","volume":"30","author":"XK Liao","year":"2015","unstructured":"Liao, X.K., et al.: High performance interconnect network for Tianhe system. J. Comput. Sci. Technol. 30(2), 259\u2013272 (2015). \n                      https:\/\/doi.org\/10.1007\/s11390-015-1520-7","journal-title":"J. Comput. Sci. Technol."},{"issue":"3","key":"17_CR20","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s11704-014-3501-3","volume":"8","author":"X Liao","year":"2014","unstructured":"Liao, X., Xiao, L., Yang, C., Lu, Y.: Milkyway-2 supercomputer: system and application. Front. Comput. Sci. 8(3), 345\u2013356 (2014). \n                      https:\/\/doi.org\/10.1007\/s11704-014-3501-3","journal-title":"Front. Comput. Sci."},{"key":"17_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/978-3-642-03770-2_17","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"G Mercier","year":"2009","unstructured":"Mercier, G., Clet-Ortega, J.: Towards an efficient process placement policy for MPI applications in multicore environments. In: Ropo, M., Westerholm, J., Dongarra, J. (eds.) EuroPVM\/MPI 2009. LNCS, vol. 5759, pp. 104\u2013115. Springer, Heidelberg (2009). \n                      https:\/\/doi.org\/10.1007\/978-3-642-03770-2_17"},{"key":"17_CR22","doi-asserted-by":"publisher","unstructured":"Mirsadeghi, S.H., Afsahi, A.: PTRAM: a parallel topology-and routing-aware mapping framework for large-scale HPC systems. In: 2016 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 386\u2013396 (2016). \n                      https:\/\/doi.org\/10.1109\/IPDPSW.2016.146","DOI":"10.1109\/IPDPSW.2016.146"},{"key":"17_CR23","doi-asserted-by":"publisher","unstructured":"Mirsadeghi, S.H., Afsahi, A.: Topology-aware rank reordering for MPI collectives. In: 2016 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 1759\u20131768 (2016). \n                      https:\/\/doi.org\/10.1109\/IPDPSW.2016.139","DOI":"10.1109\/IPDPSW.2016.139"},{"issue":"3","key":"17_CR24","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1007\/s11704-014-3500-9","volume":"8","author":"Z Pang","year":"2014","unstructured":"Pang, Z., et al.: The TH express high performance interconnect networks. Front. Comput. Sci. 8(3), 357\u2013366 (2014). \n                      https:\/\/doi.org\/10.1007\/s11704-014-3500-9","journal-title":"Front. Comput. Sci."},{"key":"17_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1007\/3-540-61142-8_588","volume-title":"High-Performance Computing and Networking","author":"F Pellegrini","year":"1996","unstructured":"Pellegrini, F., Roman, J.: Scotch: a software package for static mapping by dual recursive bipartitioning of process and architecture graphs. In: Liddell, H., Colbrook, A., Hertzberger, B., Sloot, P. (eds.) HPCN-Europe 1996. LNCS, vol. 1067, pp. 493\u2013498. Springer, Heidelberg (1996). \n                      https:\/\/doi.org\/10.1007\/3-540-61142-8_588"},{"key":"17_CR26","doi-asserted-by":"publisher","unstructured":"Rodrigues, E.R., Madruga, F.L., Navaux, P.O.A., Panetta, J.: Multi-core aware process mapping and its impact on communication overhead of parallel applications. In: 2009 IEEE Symposium on Computers and Communications, pp. 811\u2013817 (2009). \n                      https:\/\/doi.org\/10.1109\/ISCC.2009.5202271","DOI":"10.1109\/ISCC.2009.5202271"},{"key":"17_CR27","doi-asserted-by":"publisher","unstructured":"Schreiber, R.S., et al.: The NAS parallel benchmarks. In: 1991 ACM\/IEEE Conference on Supercomputing (Supercomputing 1991) (SC), pp. 158\u2013165 (1991). \n                      https:\/\/doi.org\/10.1145\/125826.125925","DOI":"10.1145\/125826.125925"},{"key":"17_CR28","doi-asserted-by":"publisher","unstructured":"Sreepathi, S., D\u2019Azevedo, E., Philip, B., Worley, P.: Communication characterization and optimization of applications using topology-aware task mapping on large supercomputers. In: Proceedings of the 7th ACM\/SPEC on International Conference on Performance Engineering, ICPE 2016, pp. 225\u2013236. ACM (2016). \n                      https:\/\/doi.org\/10.1145\/2851553.2851575","DOI":"10.1145\/2851553.2851575"},{"key":"17_CR29","doi-asserted-by":"publisher","unstructured":"Subramoni, H., et al.: Design of network topology aware scheduling services for large infiniband clusters. In: 2013 IEEE International Conference on Cluster Computing (CLUSTER), pp. 1\u20138 (2013). \n                      https:\/\/doi.org\/10.1109\/CLUSTER.2013.6702677","DOI":"10.1109\/CLUSTER.2013.6702677"},{"key":"17_CR30","unstructured":"Sweep3D: The ASCI Sweep3D Benchmark Code (2014). \n                      http:\/\/www.llnl.gov\/asci-benchmarks\/scsi\/limited\/sweep3d\/asci_sweep3d.html\n                      \n                     (2014)"},{"key":"17_CR31","doi-asserted-by":"publisher","unstructured":"Tuncer, O., Leung, V.J., Coskun, A.K.: PaCMap: topology mapping of unstructured communication patterns onto non-contiguous allocations. In: Proceedings of the 29th ACM on International Conference on Supercomputing, ICS 2015, pp. 37\u201346. ACM (2015).\n                      https:\/\/doi.org\/10.1145\/2751205.2751225","DOI":"10.1145\/2751205.2751225"},{"key":"17_CR32","unstructured":"Walshaw, C., Cross, M.: Jostle: Parallel multilevel graph-partitioning software - an overview. Mesh Partitioning Techniques and Domain Decomposition Techniques (2007)"},{"key":"17_CR33","first-page":"1044","volume":"38","author":"T Wang","year":"2015","unstructured":"Wang, T., Qing, P., Wei, D., Qi, F.B.: Optimization of process-to-core mapping based on clustering analysis. Chin. J. Comput. 38, 1044\u20131055 (2015)","journal-title":"Chin. J. Comput."},{"issue":"4","key":"17_CR34","doi-asserted-by":"publisher","first-page":"1691","DOI":"10.1007\/s11227-016-1876-7","volume":"73","author":"J Wu","year":"2017","unstructured":"Wu, J., Xiong, X., Berrocal, E., Wang, J., Lan, Z.: Topology mapping of irregular parallel applications on torus-connected supercomputers. J. Supercomput. 73(4), 1691\u20131714 (2017). \n                      https:\/\/doi.org\/10.1007\/s11227-016-1876-7","journal-title":"J. Supercomput."},{"key":"17_CR35","doi-asserted-by":"publisher","unstructured":"Yu, H., Chung, I.H., Moreira, J.: Topology mapping for blue Gene\/L supercomputer. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006. ACM (2006). \n                      https:\/\/doi.org\/10.1145\/1188455.1188576","DOI":"10.1145\/1188455.1188576"},{"key":"17_CR36","unstructured":"Zerr, R.J., Baker, R.S.: SNAP: SN (discrete ordinates) application proxy - proxy description. Technical report, Los Alamos National Laboratory (2013)"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-05051-1_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T05:18:33Z","timestamp":1558329513000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-05051-1_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030050504","9783030050511"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-05051-1_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"7 December 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 November 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 November 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/nsclab.org\/ica3pp2018\/authors.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"407","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"141","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"50","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"35% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"2.3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"7.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}