{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T18:26:50Z","timestamp":1767810410405,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,1,16]],"date-time":"2023-01-16T00:00:00Z","timestamp":1673827200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100020950","name":"National Science and Technology Council","doi-asserted-by":"publisher","award":["NSTC 111-2223-E-007-004-MY3, NSTC 111-2628-E-007-0"],"award-info":[{"award-number":["NSTC 111-2223-E-007-004-MY3, NSTC 111-2628-E-007-0"]}],"id":[{"id":"10.13039\/501100020950","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,1,16]]},"DOI":"10.1145\/3566097.3567838","type":"proceedings-article","created":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T18:40:49Z","timestamp":1675190449000},"page":"314-319","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["COLAB"],"prefix":"10.1145","author":[{"given":"Bo-Wun","family":"Cheng","sequence":"first","affiliation":[{"name":"National Tsing Hua University, Hsinchu, Taiwan"}]},{"given":"En-Ming","family":"Huang","sequence":"additional","affiliation":[{"name":"National Tsing Hua University, Hsinchu, Taiwan"}]},{"given":"Chen-Hao","family":"Chao","sequence":"additional","affiliation":[{"name":"National Tsing Hua University, Hsinchu, Taiwan"}]},{"given":"Wei-Fang","family":"Sun","sequence":"additional","affiliation":[{"name":"National Tsing Hua University, Hsinchu, Taiwan"}]},{"given":"Tsung-Tai","family":"Yeh","sequence":"additional","affiliation":[{"name":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan"}]},{"given":"Chun-Yi","family":"Lee","sequence":"additional","affiliation":[{"name":"National Tsing Hua University, Hsinchu, Taiwan"}]}],"member":"320","published-online":{"date-parts":[[2023,1,31]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.11"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 24th Workshop on Synthesis And System Integration of Mixed Information technologies (SASIMI '22)","author":"Cheng Bo-Wun","year":"2022","unstructured":"Bo-Wun Cheng, En-Ming Haung, Chen-Hao Chao, Wei-Fang Sun, Tsung-Tai Yeh, and Chun-Yi Lee. 2022. Remote Access Tag Array for Efficient GPU Intra-Cluster Data Sharing. In Proceedings of the 24th Workshop on Synthesis And System Integration of Mixed Information technologies (SASIMI '22). 221--222."},{"key":"e_1_3_2_1_4_1","volume-title":"Understanding and Optimizing GPU Cache Memory Performance for Compute Workloads. In 2014 IEEE 13th International Symposium on Parallel and Distributed Computing. 189--196","author":"Choo Kyoshin","year":"2014","unstructured":"Kyoshin Choo, William Panlener, and Byunghyun Jang. 2014. Understanding and Optimizing GPU Cache Memory Performance for Compute Workloads. In 2014 IEEE 13th International Symposium on Parallel and Distributed Computing. 189--196."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3001589"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Scott Grauer-Gray Lifan Xu Robert Searles Sudhee Ayalasomayajula and John Cavazos. 2012. Auto-tuning a high-level language targeted to GPU codes. In 2012 Innovative Parallel Computing (InPar). 1--10.","DOI":"10.1109\/InPar.2012.6339595"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414623"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00047"},{"key":"e_1_3_2_1_9_1","volume-title":"Analyzing and Leveraging Remote-Core Bandwidth for Enhanced Performance in GPUs. In 2019 28th International Conference on Parallel Architectures and Compilation Techniques (PACT). 258--271","author":"Ibrahim Mohamed Assem","year":"2019","unstructured":"Mohamed Assem Ibrahim, Hongyuan Liu, Onur Kayiran, and Adwait Jog. 2019. Analyzing and Leveraging Remote-Core Bandwidth for Enhanced Performance in GPUs. In 2019 28th International Conference on Parallel Architectures and Compilation Techniques (PACT). 258--271."},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems","author":"Jog Adwait","unstructured":"Adwait Jog, Onur Kayiran, Nachiappan Chidambaram Nachiappan, Asit K. Mishra, Mahmut T. Kandemir, Onur Mutlu, Ravishankar Iyer, and Chita R. Das. 2013. OWL: Cooperative Thread Array Aware Scheduling Techniques for Improving GPGPU Performance. In Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems (Houston, Texas, USA) (ASPLOS '13). Association for Computing Machinery, New York, NY, USA, 395--406."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300053.3319418"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques. 157--166","author":"Kayiran Onur","unstructured":"Onur Kayiran, Adwait Jog, Mahmut T. Kandemir, and Chita R. Das. 2013. Neither more nor less: Optimizing thread-level parallelism for GPGPUs. In Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques. 157--166."},{"key":"e_1_3_2_1_13_1","volume-title":"Accel-Sim: An Extensible Simulation Framework for Validated GPU Modeling. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 473--486","author":"Khairy Mahmoud","unstructured":"Mahmoud Khairy, Zhesheng Shen, Tor M. Aamodt, and Timothy G. Rogers. 2020. Accel-Sim: An Extensible Simulation Framework for Validated GPU Modeling. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 473--486."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080239"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751237"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.33"},{"key":"e_1_3_2_1_18_1","volume-title":"Retrieved","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. 2020. NVIDIA AMPERE GA102 GPU ARCHITECTURE. Retrieved July 27, 2022 from https:\/\/www.nvidia.com\/content\/PDF\/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.pdf"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322222"},{"key":"e_1_3_2_1_20_1","volume-title":"Cache-Conscious Wavefront Scheduling. In 2012 45th Annual IEEE\/ACM International Symposium on Microarchitecture. 72--83","author":"Rogers Timothy G.","unstructured":"Timothy G. Rogers, Mike O'Connor, and Tor M. Aamodt. 2012. Cache-Conscious Wavefront Scheduling. In 2012 45th Annual IEEE\/ACM International Symposium on Microarchitecture. 72--83."},{"key":"e_1_3_2_1_21_1","volume-title":"Divergence-Aware Warp Scheduling. In 2013 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 99--110","author":"Rogers Timothy G.","unstructured":"Timothy G. Rogers, Mike O'Connor, and Tor M. Aamodt. 2013. Divergence-Aware Warp Scheduling. In 2013 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 99--110."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.54"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3287633"}],"event":{"name":"ASPDAC '23: 28th Asia and South Pacific Design Automation Conference","location":"Tokyo Japan","acronym":"ASPDAC '23","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CEDA","IEICE","IEEE CAS","IPSJ"]},"container-title":["Proceedings of the 28th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3566097.3567838","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3566097.3567838","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T17:32:55Z","timestamp":1767807175000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3566097.3567838"}},"subtitle":["Collaborative and Efficient Processing of Replicated Cache Requests in GPU"],"short-title":[],"issued":{"date-parts":[[2023,1,16]]},"references-count":23,"alternative-id":["10.1145\/3566097.3567838","10.1145\/3566097"],"URL":"https:\/\/doi.org\/10.1145\/3566097.3567838","relation":{},"subject":[],"published":{"date-parts":[[2023,1,16]]},"assertion":[{"value":"2023-01-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}