{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T06:06:34Z","timestamp":1747893994312},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030206550"},{"type":"electronic","value":"9783030206567"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20656-7_11","type":"book-chapter","created":{"date-parts":[[2019,6,4]],"date-time":"2019-06-04T19:02:40Z","timestamp":1559674960000},"page":"209-226","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["PerfMemPlus: A Tool for Automatic Discovery of Memory Performance Problems"],"prefix":"10.1007","author":[{"given":"Christian","family":"Helm","sequence":"first","affiliation":[]},{"given":"Kenjiro","family":"Taura","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,17]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Bhadauria, M., Weaver, V.M., Mckee, S.A.: Understanding parsec performance on contemporary CMPS. In: Proceedings of the 2009 IEEE International Symposium on Workload Characterization, IISWC 2009, pp. 98\u2013107 (2009)","DOI":"10.1109\/IISWC.2009.5306793"},{"key":"11_CR2","unstructured":"Bienia, C.: Benchmarking Modern Multiprocessors. Ph.D. thesis, Princeton University (2011)"},{"key":"11_CR3","unstructured":"Bingmann, T.: Parallel Memory Bandwidth Benchmark (2013). \n                      https:\/\/panthema.net\/2013\/pmbw\/"},{"key":"11_CR4","doi-asserted-by":"crossref","unstructured":"Chabbi, M., Wen, S., Liu, X.: Featherlight on-the-fly false-sharing detection. In: Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 152\u2013167 (2018)","DOI":"10.1145\/3200691.3178499"},{"key":"11_CR5","unstructured":"Drebes, A., Pop, A., Heydemann, K., Cohen, A., Drachtemam, N.: Aftermath: a graphical tool for performance analysis and debugging of fine-grained task-parallel programs and run-time systems. In: 7th Workshop on Programmability Issues for Heterogeneous Multicores (2014)"},{"key":"11_CR6","doi-asserted-by":"crossref","unstructured":"Eklov, D., Nikoleris, N., Hagersten, E.: A software based profiling method for obtaining speedup stacks on commodity multi-cores. In: ISPASS 2014 - IEEE International Symposium on Performance Analysis of Systems and Software (2014)","DOI":"10.1109\/ISPASS.2014.6844479"},{"key":"11_CR7","doi-asserted-by":"crossref","unstructured":"Eyerman, S., Du Bois, K., Eeckhout, L.: Speedup stacks: identifying scaling bottlenecks in multi-threaded applications. In: ISPASS 2012 - IEEE International Symposium on Performance Analysis of Systems and Software, pp. 145\u2013155 (2012)","DOI":"10.1109\/ISPASS.2012.6189221"},{"issue":"7","key":"11_CR8","doi-asserted-by":"publisher","first-page":"2180","DOI":"10.1109\/TVCG.2017.2718532","volume":"24","author":"Alfredo Gimenez","year":"2018","unstructured":"Gimenez, A., et al.: MemAxes: visualization and analytics for characterizing complex memory performance behaviors. IEEE Trans. Vis. Comput. Graph. 27(5), 2180\u20132193 (2017)","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Gim\u00e9nez, A., et al.: Dissecting on-node memory access performance: a semantic approach. In: International Conference for High Performance Computing, Networking, Storage and Analysis, SC, pp. 166\u2013176 (2014)","DOI":"10.1109\/SC.2014.19"},{"key":"11_CR10","unstructured":"GNU: gprof (2018). \n                      https:\/\/sourceware.org\/binutils\/docs\/gprof\/"},{"key":"11_CR11","unstructured":"Guennebaud, G., Jacob, B., et al.: Eigen v3 (2010). \n                      http:\/\/eigen.tuxfamily.org"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Huynh, A., Taura, K.: Delay Spotter: a tool for spotting scheduler-caused delays in task parallel runtime systems. In: IEEE International Conference on Cluster Computing, ICCC, pp. 114\u2013125 (2017)","DOI":"10.1109\/CLUSTER.2017.82"},{"key":"11_CR13","unstructured":"Intel Corporation: Avoiding and identifying false sharing among threads (2012). \n                      https:\/\/software.intel.com\/en-us\/articles\/avoiding-and-identifying-false-sharing-among-threads"},{"key":"11_CR14","unstructured":"Intel Corporation: Finding your memory access performance bottlenecks (2016). \n                      https:\/\/software.intel.com\/en-us\/articles\/finding-your-memory-access-performance-bottlenecks"},{"key":"11_CR15","doi-asserted-by":"crossref","unstructured":"Jayasena, S., et al.: Detection of false sharing using machine learning. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis - SC 2013 (2013)","DOI":"10.1145\/2503210.2503269"},{"key":"11_CR16","unstructured":"Lachaize, R., Lepers, B., Qu\u00e9ma, V.: MemProf: a memory profiler for NUMA multicore systems. In: Proceedings of the 2012 USENIX Conference on Annual Technical Conference, p. 5 (2012)"},{"key":"11_CR17","unstructured":"LeCun, Y., Cortes, C., Burges, C.: The Mnist Database of Handwritten Digits (2016). \n                      http:\/\/yann.lecun.com\/exdb\/mnist\/"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Liu, T., Berger, E.D.: SHERIFF: precise detection and automatic mitigation of false sharing. In: Proceedings of the 2011 ACM International Conference on Object Oriented Programming Systems Languages and Applications, pp. 3\u201318 (2011)","DOI":"10.1145\/2048066.2048070"},{"key":"11_CR19","doi-asserted-by":"crossref","unstructured":"Liu, T., Liu, X.: Cheetah: detecting false sharing efficiently and effectively. In: Proceedings of the International Symposium on Code Generation and Optimization (2016)","DOI":"10.1145\/2854038.2854039"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Liu, T., Tian, C., Hu, Z., Berger, E.D.: PREDATOR: predictive false sharing detection. In: Proceedings of the 19th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (2014)","DOI":"10.1145\/2555243.2555244"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Liu, X., Mellor-Crummey, J.: A data-centric profiler for parallel programs. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis - SC 2013 (2013)","DOI":"10.1145\/2503210.2503297"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"Liu, X., Mellor-Crummey, J.: A tool to analyze the performance of multithreaded programs on NUMA architectures. In: ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP), pp. 259\u2013272 (2014)","DOI":"10.1145\/2692916.2555271"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Liu, X., Sharma, K., Mellor-Crummey, J.: ArrayTool: a lightweight profiler to guide array regrouping. In: Proceedings of the 23rd International Conference on Parallel Architectures and Compilation, pp. 405\u2013416 (2014)","DOI":"10.1145\/2628071.2628102"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Liu, X., Wu, B.: ScaAnalyzer: a tool to identify memory scalability bottlenecks in parallel programs. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis - SC 2015 (2015)","DOI":"10.1145\/2807591.2807648"},{"key":"11_CR25","doi-asserted-by":"crossref","unstructured":"Majo, Z., Gross, T.R.: (Mis) Understanding the NUMA memory system performance of multithreaded workloads. In: IEEE International Symposium on Workload Characterization (IISWC), pp. 11\u201322 (2013)","DOI":"10.1109\/IISWC.2013.6704666"},{"key":"11_CR26","unstructured":"Mario, J.: C2C - False Sharing Detection in Linux Perf (2016). \n                      https:\/\/joemario.github.io\/blog\/2016\/09\/01\/c2c-blog"},{"key":"11_CR27","unstructured":"McCalpin, J.D.: STREAM benchmark (1995). \n                      http:\/\/www.cs.virginia.edu\/stream\/"},{"key":"11_CR28","unstructured":"Paratools: Threadspotter (2018). \n                      http:\/\/threadspotter.paratools.com"},{"key":"11_CR29","doi-asserted-by":"crossref","unstructured":"Pesterev, A., Zeldovich, N., Morris, R.T., Orlando, T.P.: Locating cache performance bottlenecks using data profiling. In: Proceedings of the 5th European Conference on Computer Systems EuroSys 2010, p. 335 (2010)","DOI":"10.1145\/1755913.1755947"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Qiao, Y., et al.: Parallelizing and optimizing neural Encoder Decoder models without padding on multi-core architecture. Future Gener. Comput. Syst. (2018)","DOI":"10.1016\/j.future.2018.04.070"},{"key":"11_CR31","doi-asserted-by":"crossref","unstructured":"Roth, M., Best, M.J., Mustard, C., Fedorova, A.: Deconstructing the overhead in parallel applications. In: Proceedings - 2012 IEEE International Symposium on Workload Characterization, IISWC 2012 1, pp. 59\u201368 (2012)","DOI":"10.1109\/IISWC.2012.6402901"},{"key":"11_CR32","unstructured":"Southern, G., Renau, J.: Deconstructing PARSEC scalability. In: 11th Annual Workshop on Duplicating, Deconstructing and Debunking, p. 10 (2015)"},{"key":"11_CR33","unstructured":"Taura, K.: Mnist application (2016). \n                      https:\/\/www.eidos.ic.i.u-tokyo.ac.jp\/~tau\/lecture\/paralleldistributed\/2016\/examples\/18mnist\/"},{"key":"11_CR34","unstructured":"Viswanathan, V., Kumar, K., Willhalm, T., Lu, P., Filipiak, B., Sakthivelu, S.: Intel memory latency checker (2018). \n                      https:\/\/software.intel.com\/en-us\/articles\/intelr-memory-latency-checker"},{"key":"11_CR35","doi-asserted-by":"crossref","unstructured":"Xu, H., Wen, S., Gimenez, A., Gamblin, T., Liu, X.: DR-BW: identifying bandwidth contention in NUMA architectures with supervised learning. In: IEEE International Parallel and Distributed Processing Symposium, IPDPS (2017)","DOI":"10.1109\/IPDPS.2017.97"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20656-7_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,4]],"date-time":"2019-06-04T19:18:23Z","timestamp":1559675903000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-20656-7_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030206550","9783030206567"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20656-7_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"17 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Frankfurt","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 June 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 June 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"70","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"17","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"4-5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"n\/a","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}