{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:05:30Z","timestamp":1773277530834,"version":"3.50.1"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030993719","type":"print"},{"value":"9783030993726","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-99372-6_5","type":"book-chapter","created":{"date-parts":[[2022,3,23]],"date-time":"2022-03-23T18:03:23Z","timestamp":1648058603000},"page":"67-82","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["LC-MEMENTO: A Memory Model for Accelerated Architectures"],"prefix":"10.1007","author":[{"given":"Kiran","family":"Ranganath","sequence":"first","affiliation":[]},{"given":"Jesun","family":"Firoz","sequence":"additional","affiliation":[]},{"given":"Joshua","family":"Suetterlein","sequence":"additional","affiliation":[]},{"given":"Joseph","family":"Manzano","sequence":"additional","affiliation":[]},{"given":"Andres","family":"Marquez","sequence":"additional","affiliation":[]},{"given":"Mark","family":"Raugas","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Wong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,3,24]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Abdolrashidi, A., et al.: WIREFRAME: supporting data-dependent parallelism through dependency graph execution in GPUs. In: Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 600\u2013611 (2017)","DOI":"10.1145\/3123939.3123976"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Abdolrashidi, A., et al.: BlockMaestro: enabling programmer-transparent task-based execution in GPU systems. In: 2021 48th Annual IEEE\/ACM International Symposium on Computer Architecture (ISCA). IEEE (2021)","DOI":"10.1109\/ISCA52012.2021.00034"},{"issue":"12","key":"5_CR3","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/2.546611","volume":"29","author":"SV Adve","year":"1996","unstructured":"Adve, S.V., Gharachorloo, K.: Shared memory consistency models: a tutorial. Computer 29(12), 66\u201376 (1996)","journal-title":"Computer"},{"issue":"8","key":"5_CR4","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1145\/3155284.3018756","volume":"52","author":"T Ben-Nun","year":"2017","unstructured":"Ben-Nun, T., et al.: Groute: an asynchronous multi-GPU programming model for irregular computations. ACM SIGPLAN Notices 52(8), 235\u2013248 (2017)","journal-title":"ACM SIGPLAN Notices"},{"key":"5_CR5","unstructured":"Bershad, B.N., Zekauskas, M.J.: Midway: shared memory parallel programming with entry consistency for distributed memory multiprocessors. Technical report (1991)"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Chen, G., et al.: EffiSha: a software framework for enabling effficient preemptive scheduling of GPU. In: Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 3\u201316 (2017)","DOI":"10.1145\/3018743.3018748"},{"key":"5_CR7","unstructured":"Droco, M., et al.: Global Memory and Threading (GMT). https:\/\/github.com\/pnnl\/gmt"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Firoz, J.S., Zalewski, M., Kanewala, T., Lumsdaine, A.: Synchronization-avoiding graph algorithms. In: 2018 IEEE 25th International Conference on High Performance Computing (HiPC), pp. 52\u201361. IEEE (2018)","DOI":"10.1109\/HiPC.2018.00015"},{"key":"5_CR9","unstructured":"Modelado Foundation: Open Community Runtime. https:\/\/xstackwiki.modelado.org\/Open_Community_Runtime"},{"issue":"8","key":"5_CR10","doi-asserted-by":"publisher","first-page":"798","DOI":"10.1109\/12.868026","volume":"49","author":"GR Gao","year":"2000","unstructured":"Gao, G.R., Sarkar, V.: Location consistency-a new memory model and cache consistency protocol. IEEE Trans. Comput. 49(8), 798\u2013813 (2000)","journal-title":"IEEE Trans. Comput."},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"Hechtman, B.A., Sorin, D.J.: Exploring memory consistency for massively-threaded throughput-oriented processors. In: Proceedings of the 40th Annual International Symposium on Computer Architecture, pp. 201\u2013212 (2013)","DOI":"10.1145\/2485922.2485940"},{"key":"5_CR12","unstructured":"Jeon, M., et al.: Analysis of large-scale multi-tenant GPU clusters for DNN training workloads. In: 2019 USENIX Annual Technical Conference (USENIX ATC 19), pp. 947\u2013960 (2019)"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Landwehr, J., et al.: Designing scalable distributed memory models: a case study. In: Proceedings of the Computing Frontiers Conference, CF 2017, pp. 174\u2013182. Association for Computing Machinery, New York (2017)","DOI":"10.1145\/3075564.3077425"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Lenoski, D., et al.: The directory-based cache coherence protocol for the DASH multiprocessor. In: Proceedings of the 17th Annual International Symposium on Computer Architecture, ISCA 1990, pp. 148\u2013159. ACM, New York (1990)","DOI":"10.1145\/325096.325132"},{"key":"5_CR15","doi-asserted-by":"crossref","unstructured":"Long, G., et al.: Location consistency model revisited: problem, solution and prospects. In: 2008 Ninth International Conference on Parallel and Distributed Computing, Applications and Technologies, pp. 91\u201398 (2008)","DOI":"10.1109\/PDCAT.2008.31"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Lustig, D., et al.: A formal analysis of the NVIDIA PTX memory consistency model. In: Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2019, pp. 257\u2013270. Association for Computing Machinery, New York (2019)","DOI":"10.1145\/3297858.3304043"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Luszczek, P.R., et al.: The HPC Challenge (HPCC) benchmark suite. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006, p. 213-es. Association for Computing Machinery, New York (2006)","DOI":"10.1145\/1188455.1188677"},{"key":"5_CR18","unstructured":"Protiae, J., Milutinoviae, V.: Entry consistency versus lazy release consistency in DSM systems: analytical comparison and a new hybrid solution. In: Proceedings of the Sixth IEEE Computer Society Workshop on Future Trends of Distributed Computing Systems, 1997, pp. 78\u201383, October 1997"},{"issue":"2","key":"5_CR19","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1109\/LCA.2019.2933842","volume":"18","author":"K Ranganath","year":"2019","unstructured":"Ranganath, K., et al.: Speeding up collective communications through inter-GPU re-routing. IEEE Comput. Archit. Lett. 18(2), 128\u2013131 (2019)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Ranganath, K., et al.: MAPA: multi-accelerator pattern allocation policy for multi-tenant GPU servers. In: SC21: International Conference for High Performance Computing, Networking, Storage and Analysis. ACM (2021)","DOI":"10.1145\/3458817.3480853"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Ren, X., Lis, M.: Efficient sequential consistency in GPUs via relativistic cache coherence. In: 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 625\u2013636. IEEE (2017)","DOI":"10.1109\/HPCA.2017.40"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Ren, X., Lustig, D., Bolotin, E., Jaleel, A., Villa, O., Nellans, D.: HMG: extending cache coherence protocols across modern hierarchical multi-GPU systems. In: 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 582\u2013595. IEEE (2020)","DOI":"10.1109\/HPCA47549.2020.00054"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Singh, A., Aga, S., Narayanasamy, S.: Efficiently enforcing strong memory ordering in GPUs. In: Proceedings of the 48th International Symposium on Microarchitecture, pp. 699\u2013712 (2015)","DOI":"10.1145\/2830772.2830778"},{"key":"5_CR24","unstructured":"Rennich, S.: Streams and Concurrency. https:\/\/developer.download.nvidia.com\/CUDA\/training\/StreamsAndConcurrencyWebinar.pdf"},{"key":"5_CR25","unstructured":"Suetterlein, J., et al.: The Abstract Runtime System: ARTS. https:\/\/github.com\/pnnl\/ARTS"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Tripathy, D., et al.: LocalityGuru: a PTX analyzer for extracting thread block-level locality in GPGPUs. In: Proceedings of the 15th IEEE\/ACM International Conference on Networking, Architecture, and Storage (2021, To appear)","DOI":"10.1109\/NAS51552.2021.9605411"},{"key":"5_CR27","unstructured":"Trott, C.R., Edwards, H.C.: Kokkos: the C++ performance portability programming model. Technical report, Sandia National Lab. (SNL-NM), Albuquerque, NM, United States (2017)"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Ueno, K., Suzumura, T.: Highly scalable graph search for the Graph500 benchmark. In: Proceedings of the 21st International Symposium on High-Performance Parallel and Distributed Computing, HPDC 2012, pp. 149\u2013160. Association for Computing Machinery, New York (2012)","DOI":"10.1145\/2287076.2287104"},{"key":"5_CR29","unstructured":"Vergara, M., et al.: Scaling the summit: deploying the world\u2019s fastest supercomputer. In: International Workshop on OpenPOWER for HPC (IWOPH 2019) (2019)"},{"key":"5_CR30","doi-asserted-by":"publisher","unstructured":"Willcock, J.J., et al.: AM++: a generalized active message framework. In: Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques, PACT 2010, pp. 401\u2013410. Association for Computing Machinery, New York (2010). https:\/\/doi.org\/10.1145\/1854273.1854323","DOI":"10.1145\/1854273.1854323"},{"key":"5_CR31","unstructured":"Xiao, W., et al.: Gandiva: introspective cluster scheduling for deep learning. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2018), pp. 595\u2013610 (2018)"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-99372-6_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,23]],"date-time":"2022-03-23T18:04:44Z","timestamp":1648058684000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-99372-6_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030993719","9783030993726"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-99372-6_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"24 March 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"LCPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Languages and Compilers for Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Newark, DE","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"lcpc2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/lcpc2021.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"11","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"82% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}