{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T04:10:48Z","timestamp":1748751048500,"version":"3.41.0"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319271392"},{"type":"electronic","value":"9783319271408"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-27140-8_6","type":"book-chapter","created":{"date-parts":[[2015,11,16]],"date-time":"2015-11-16T13:27:57Z","timestamp":1447680477000},"page":"74-87","source":"Crossref","is-referenced-by-count":0,"title":["A Data-Centric Tool to Improve the\u00a0Performance of Multithreaded Program on NUMA"],"prefix":"10.1007","author":[{"given":"Dan","family":"Zeng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofei","family":"Liao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hai","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,12,16]]},"reference":[{"key":"6_CR1","unstructured":"Intel VTune Amplifier 2015. https:\/\/software.intel.com\/en-us\/intel-vtune-amplifier-xe"},{"key":"6_CR2","unstructured":"MPP (massively parallel processing). http:\/\/whatis.techtarget.com\/definition\/MPP-massively-parallel-processing"},{"key":"6_CR3","unstructured":"Symmetric multiprocessing. http:\/\/en.wikipedia.org\/wiki\/Symmetric_multiprocessing"},{"key":"6_CR4","unstructured":"Visual Performance Analyzer. http:\/\/www.alphaworks.ibm.com\/tech\/vpa"},{"issue":"9","key":"6_CR5","doi-asserted-by":"publisher","first-page":"836","DOI":"10.1109\/TC.1980.1675684","volume":"100","author":"KE Batcher","year":"1980","unstructured":"Batcher, K.E.: Design of a massively parallel processor. IEEE Trans. Comput. (TOC) 100(9), 836\u2013840 (1980)","journal-title":"IEEE Trans. Comput. (TOC)"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Bienia, C., Kumar, S., Singh, J.P., Li, K.: The PARSEC benchmark suite: Characterization and architectural implications. In: Proceedings of the 17th International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 72\u201381 (2008)","DOI":"10.1145\/1454115.1454128"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Blagodurov, S., Zhuravlev, S., Fedorova, A., Kamali, A.: A case for NUMA-aware contention management on multicore systems. In: Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques(PACT), pp. 557\u2013558 (2010)","DOI":"10.1145\/1854273.1854350"},{"key":"6_CR8","unstructured":"Drongowski, P.J.: An introduction to analysis and optimization with AMD Code-Analyst Performance Analyzer. Advanced Micro Devices, Inc (2008)"},{"key":"6_CR9","unstructured":"Drongowski, P.J., Center, B.D.: Instruction-based sampling: A new performance analysis technique for AMD family 10h processors. Advanced Micro Devices, Inc (2007)"},{"key":"6_CR10","unstructured":"Intel: Intel 64 and IA-32 Architectures Software Developers Manual. Volume 3B: System Programming Guide (Part 2) (2013)"},{"key":"6_CR11","unstructured":"Jin, H.Q., Frumkin, M., Yan, J.: The OpenMP implementation of NAS parallel benchmarks and its performance (1999)"},{"key":"6_CR12","unstructured":"Kleen, A.: A NUMA API for Linux. Novel Inc (2005)"},{"key":"6_CR13","unstructured":"Lachaize, R., Lepers, B., Qu\u00e9ma, V.: MemProf: A memory profiler for NUMA multicore systems. In: Proceedings of the 2012 USENIX Conference on Annual Technical Conference (ATC), pp. 53\u201364 (2012)"},{"issue":"7","key":"6_CR14","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1145\/2508834.2513149","volume":"11","author":"C Lameter","year":"2013","unstructured":"Lameter, C.: NUMA(Non-Uniform Memory Access): An overview. ACM Queue 11(7), 40 (2013)","journal-title":"ACM Queue"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Majo, Z., Gross, T.R.: Matching memory access patterns and data placement for NUMA systems. In: Proceedings of the 10th International Symposium on Code Generation and Optimization (CGO), pp. 230\u2013241 (2012)","DOI":"10.1145\/2259016.2259046"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Majo, Z., Gross, T.R.: A library for portable and composable data locality optimizations for NUMA systems. In: Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP), pp. 227\u2013238 (2015)","DOI":"10.1145\/2858788.2688509"},{"key":"6_CR17","unstructured":"Matz, M., Hubicka, J., Jaeger, A., Mitchell, M.: System V Application Binary Interface. AMD64 Architecture Processor Supplement, Draft v0 99 (2005)"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"McCurdy, C., Vetter, J.: Memphis: Finding and fixing NUMA-related performance problems on multi-core platforms. In: Proceedings of IEEE International Symposium on Performance Analysis of Systems & Software (ISPASS), pp. 87\u201396 (2010)","DOI":"10.1109\/ISPASS.2010.5452060"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Rao, J., Wang, K., Zhou, X., Xu, C.: Optimizing virtual machine scheduling in NUMA multicore systems. In: Proceedings of IEEE 19th International Symposium on High Performance Computer Architecture (HPCA), pp. 306\u2013317 (2013)","DOI":"10.1109\/HPCA.2013.6522328"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Tam, D.K., Azimi, R., Stumm, M.: Thread clustering: sharing-aware scheduling on SMP-CMP-SMT multiprocessors. In: Proceedings of the 2007 ACM European Conference on Computer Systems (EuroSys), pp. 47\u201358 (2007)","DOI":"10.1145\/1272996.1273004"},{"issue":"3","key":"6_CR21","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1007\/BF02714571","volume":"44","author":"W Zheng","year":"2001","unstructured":"Zheng, W., Yang, B., Lin, W., Li, Z.: Task scheduling of parallel programs to optimize communications for cluster of SMPs. Sci. China Ser. Inf. Sci. 44(3), 213\u2013225 (2001)","journal-title":"Sci. China Ser. Inf. Sci."}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-27140-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T12:06:46Z","timestamp":1748693206000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-27140-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319271392","9783319271408"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-27140-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}