{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T19:10:16Z","timestamp":1743621016414,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819642069","type":"print"},{"value":"9789819642076","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-4207-6_40","type":"book-chapter","created":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T18:39:56Z","timestamp":1743619196000},"page":"441-453","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["NAAM: Enhancing Automatic Task Mapping Efficiency on\u00a0NUMA Machines"],"prefix":"10.1007","author":[{"given":"Yifei","family":"Yang","sequence":"first","affiliation":[]},{"given":"Tianyufei","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Linchang","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Chengrun","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Xuezheng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Miao","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Di","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,1]]},"reference":[{"key":"40_CR1","doi-asserted-by":"publisher","first-page":"103023","DOI":"10.1016\/j.parco.2023.103023","volume":"116","author":"C Foyer","year":"2023","unstructured":"Foyer, C., Goglin, B., Rubio Proa\u00f1o, A.: A survey of software techniques to emulate heterogeneous memory systems in high-performance computing. Parallel Comput. 116, 103023 (2023). https:\/\/doi.org\/10.1016\/j.parco.2023.103023","journal-title":"Parallel Comput."},{"key":"40_CR2","doi-asserted-by":"publisher","unstructured":"Bauer, M., Treichler, S., Slaughter, E., Aiken, A.: Legion: expressing locality and independence with logical regions. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (SC), pp. 1\u201311 (2012). https:\/\/doi.org\/10.1109\/SC.2012.71","DOI":"10.1109\/SC.2012.71"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Bosilca, G., Bouteiller, A., Danalis, A., Herault, T., Lemariner, P., Dongarra, J.: DAGuE: a generic distributed DAG engine for high performance computing, pp. 1151\u20131158. IEEE, Anchorage (2011)","DOI":"10.1109\/IPDPS.2011.281"},{"key":"40_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"863","DOI":"10.1007\/978-3-642-03869-3_80","volume-title":"Euro-Par 2009 Parallel Processing","author":"C Augonnet","year":"2009","unstructured":"Augonnet, C., Thibault, S., Namyst, R., Wacrenier, P.-A.: StarPU: a unified platform for task scheduling on heterogeneous multicore architectures. In: Sips, H., Epema, D., Lin, H.-X. (eds.) Euro-Par 2009. LNCS, vol. 5704, pp. 863\u2013874. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-03869-3_80"},{"key":"40_CR5","unstructured":"Jia, Z., Zaharia, M., Aiken, A.: Beyond data and model parallelism for deep neural networks. In: Talwalkar, A., Smith, V., Zaharia, M. (eds.) Proceedings of Machine Learning and Systems, vol.\u00a01, pp. 1\u201313 (2019)"},{"key":"40_CR6","unstructured":"Aiken, A., Bauer, M.: Programming with Legion (2023). https:\/\/legion.stanford.edu\/pdfs\/legion-manual.pdf"},{"key":"40_CR7","doi-asserted-by":"publisher","unstructured":"S.\u00a0F. X.\u00a0Teixeira, T., Henzinger, A., Yadav, R., Aiken, A.: Automated mapping of task-based programs onto distributed and heterogeneous machines. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC). Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3581784.3607079","DOI":"10.1145\/3581784.3607079"},{"key":"40_CR8","doi-asserted-by":"publisher","unstructured":"Brink, S., et al.: Thicket: seeing the performance experiment forest for the individual run trees. In: Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing (HPDC), pp. 281\u2013293. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3588195.3592989","DOI":"10.1145\/3588195.3592989"},{"key":"40_CR9","doi-asserted-by":"publisher","unstructured":"Kanakagiri, R., Solomonik, E.: Minimum cost loop nests for contraction of a sparse tensor with a tensor network. In: Proceedings of the 36th ACM Symposium on Parallelism in Algorithms and Architectures (SPAA), pp. 169\u2013181. Association for Computing Machinery, New York (2024). https:\/\/doi.org\/10.1145\/3626183.3659985","DOI":"10.1145\/3626183.3659985"},{"key":"40_CR10","doi-asserted-by":"publisher","unstructured":"Bundy, A., Wallen, L.: Breadth-first search. Catalogue Artif. Intell. Tools 13 (1984). https:\/\/doi.org\/10.1007\/978-3-642-96868-6_25","DOI":"10.1007\/978-3-642-96868-6_25"},{"key":"40_CR11","doi-asserted-by":"publisher","unstructured":"Ansel, J., et al.: OpenTuner: an extensible framework for program autotuning. In: Proceedings of the 23rd International Conference on Parallel Architectures and Compilation (PACT), pp. 303\u2013316. Association for Computing Machinery, New York (2014). https:\/\/doi.org\/10.1145\/2628071.2628092","DOI":"10.1145\/2628071.2628092"},{"key":"40_CR12","doi-asserted-by":"crossref","unstructured":"Ren, M., Park, J.Y., Houston, M., Aiken, A., Dally, W.J.: A tuning framework for software-managed memory hierarchies. In: 2008 International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 280\u2013291 (2008)","DOI":"10.1145\/1454115.1454155"},{"key":"40_CR13","doi-asserted-by":"publisher","unstructured":"Xu, X.J., Xiao, C.B., Tian, G.Z., Sun, T.: Hybrid scheduling deadline-constrained multi-DAGs based on reverse HEFT. In: International Conference on Information System and Artificial Intelligence (ISAI), pp. 196\u2013202 (2016). https:\/\/doi.org\/10.1109\/ISAI.2016.0050","DOI":"10.1109\/ISAI.2016.0050"},{"key":"40_CR14","doi-asserted-by":"publisher","unstructured":"R\u0103dulescu, A., van Gemund, A.J.C.: On the complexity of list scheduling algorithms for distributed-memory systems. In: Proceedings of the 13th International Conference on Supercomputing (ICS), pp. 68\u201375. Association for Computing Machinery, New York (1999). https:\/\/doi.org\/10.1145\/305138.305162","DOI":"10.1145\/305138.305162"},{"key":"40_CR15","doi-asserted-by":"publisher","unstructured":"PriyaDarshini, V.N., Sankari, P.S., Chitra, P., Venkatesh: Reliable task scheduling for heterogeneous distributed computing environment. In: International Conference on Advances in Computing, Control, and Telecommunication Technologies, pp. 494\u2013496 (2009). https:\/\/doi.org\/10.1109\/ACT.2009.127","DOI":"10.1109\/ACT.2009.127"},{"key":"40_CR16","unstructured":"Wang, Y., et al.: INSPIRIT: optimizing heterogeneous task scheduling through adaptive priority in task-based runtime systems (2024). https:\/\/arxiv.org\/abs\/2404.03226"},{"key":"40_CR17","doi-asserted-by":"publisher","unstructured":"Ganguly, D., Zhang, Z., Yang, J., Melhem, R.: Interplay between hardware prefetcher and page eviction policy in CPU-GPU unified virtual memory. In: Proceedings of the 46th International Symposium on Computer Architecture (ISCA), pp. 224\u2013235. Association for Computing Machinery, New York (2019). https:\/\/doi.org\/10.1145\/3307650.3322224","DOI":"10.1145\/3307650.3322224"},{"key":"40_CR18","doi-asserted-by":"publisher","unstructured":"Chang, C.H., Kumar, A., Sivasubramaniam, A.: To move or not to move? Page migration for irregular applications in over-subscribed GPU memory systems with DynaMap. In: Proceedings of the 14th ACM International Conference on Systems and Storage (SYSTOR). Association for Computing Machinery, New York (2021). https:\/\/doi.org\/10.1145\/3456727.3463766","DOI":"10.1145\/3456727.3463766"},{"key":"40_CR19","doi-asserted-by":"publisher","unstructured":"Muthukrishnan, H., Lustig, D., Villa, O., Wenisch, T., Nellans, D.: FinePack: transparently improving the efficiency of fine-grained transfers in multi-GPU systems. In: 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 516\u2013529 (2023). https:\/\/doi.org\/10.1109\/HPCA56546.2023.10070949","DOI":"10.1109\/HPCA56546.2023.10070949"},{"key":"40_CR20","unstructured":"Lepers, B., Quema, V., Fedorova, A.: Thread and memory placement on NUMA systems: asymmetry matters. In: 2015 USENIX Annual Technical Conference (USENIX ATC 2015), pp. 277\u2013289. USENIX Association, Santa Clara (2015). https:\/\/www.usenix.org\/conference\/atc15\/technical-session\/presentation\/lepers"},{"key":"40_CR21","doi-asserted-by":"publisher","unstructured":"Wang, Y., Li, B., Jaleel, A., Yang, J., Tang, X.: GRIT: enhancing multi-GPU performance with fine-grained dynamic page placement. In: 2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 1080\u20131094 (2024). https:\/\/doi.org\/10.1109\/HPCA57654.2024.00085","DOI":"10.1109\/HPCA57654.2024.00085"},{"issue":"8","key":"40_CR22","doi-asserted-by":"publisher","first-page":"1802","DOI":"10.14778\/3659437.3659439","volume":"17","author":"M Kim","year":"2024","unstructured":"Kim, M., Hwang, J., Heo, G., Cho, S., Mahajan, D., Park, J.: Accelerating string-key learned index structures via memoization-based incremental training. Proc. VLDB Endow. 17(8), 1802\u20131815 (2024). https:\/\/doi.org\/10.14778\/3659437.3659439","journal-title":"Proc. VLDB Endow."},{"key":"40_CR23","doi-asserted-by":"publisher","unstructured":"Roy, R.B., Patel, T., Gadepally, V., Tiwari, D.: BLISS: auto-tuning complex applications using a pool of diverse lightweight learning models. In: Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation (PLDI), pp. 1280\u20131295. Association for Computing Machinery, New York (2021). https:\/\/doi.org\/10.1145\/3453483.3454109","DOI":"10.1145\/3453483.3454109"}],"container-title":["Lecture Notes in Computer Science","Parallel and Distributed Computing, Applications and Technologies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-4207-6_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T18:40:10Z","timestamp":1743619210000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-4207-6_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819642069","9789819642076"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-4207-6_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PDCAT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel and Distributed Computing: Applications and Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hong Kong","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pdcat2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/hpcc.siat.ac.cn\/meeting\/pdcat2024\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}