{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T17:23:44Z","timestamp":1763054624877,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3764860.3768338","type":"proceedings-article","created":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T13:54:43Z","timestamp":1759326883000},"page":"84-92","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Are Your GPU Atomics Secretly Contending?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4752-475X","authenticated-orcid":false,"given":"Peter","family":"Maucher","sequence":"first","affiliation":[{"name":"Karlsruhe Institute of Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7125-2976","authenticated-orcid":false,"given":"Nick","family":"Djerfi","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8104-6590","authenticated-orcid":false,"given":"Lennard","family":"Kittner","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9369-9567","authenticated-orcid":false,"given":"Lukas","family":"Werling","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0806-3191","authenticated-orcid":false,"given":"Frank","family":"Bellosa","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD. 2025. Heterogenious Interface for Portability. https:\/\/rocm.docs.amd.com\/projects\/HIP\/en\/latest\/index.html"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. 2019. RDNA 1 White Paper. https:\/\/web.archive.org\/web\/20190821193406\/https:\/\/www.amd.com\/system\/files\/documents\/rdna-whitepaper.pdf"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2485994"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3218508"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00056"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3725798.3725801"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.34"},{"key":"e_1_3_2_1_8_1","unstructured":"Khronos\u00ae Group. 2022. Khronos Vulkan Registry. https:\/\/registry.khronos.org\/vulkan\/"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00138-012-0443-3"},{"key":"e_1_3_2_1_10_1","volume-title":"Patterson","author":"Hennessy John L.","year":"2019","unstructured":"John L. Hennessy and David A. Patterson. 2019. Computer Architecture: A Quantitative Approach. Elsevier."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00101"},{"key":"e_1_3_2_1_12_1","volume-title":"Scarpazza","author":"Jia Zhe","year":"2018","unstructured":"Zhe Jia, Marco Maggioni, Benjamin Staiger, and Daniele P. Scarpazza. 2018. Dissecting the NVIDIA Volta GPU Architecture via Microbenchmarking. arXiv:1804.06826 [cs.DC] https:\/\/arxiv.org\/abs\/1804.06826"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3547276.3548627"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2012.219"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1816021"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378471"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1364\/ECBO.2009.7373_13"},{"key":"e_1_3_2_1_18_1","volume-title":"Tagungsband Des FG-BS Fr\u00fchjahrstreffens 2024","author":"Maucher Peter","year":"2024","unstructured":"Peter Maucher, Lennard Kittner, Nico Rath, Gregor Lucka, Lukas Werling, Yussuf Khalil, Thorsten Gr\u00f6ninger, and Frank Bellosa. 2024. Full-Scale File System Acceleration on GPU. In Tagungsband Des FG-BS Fr\u00fchjahrstreffens 2024 (2024). Gesellschaft f\u00fcr Informatik eV, 10--18420."},{"key":"e_1_3_2_1_19_1","unstructured":"Devon McKee Tylor Sorensen Ishita Chaturvedi Gurpreet Dhillon and Sean Siddens. 2024. GPU Atomic Performance Modeling with Microbenchmarks. (2024). https:\/\/vulkan.org\/user\/pages\/09.events\/vulkanised-2024\/vulkanised-2024-devon-mckee.pdf"},{"key":"e_1_3_2_1_20_1","unstructured":"Xinxin Mei and Xiaowen Chu. 2016. Dissecting GPU Memory Hierarchy through Microbenchmarking. arXiv:1509.02308 [cs.AR] https:\/\/arxiv.org\/abs\/1509.02308"},{"key":"e_1_3_2_1_21_1","unstructured":"NVIDIA. 2025. CUDA C++ Programming Guide. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide"},{"key":"e_1_3_2_1_22_1","unstructured":"NVIDIA. 2025. CUDA Memory Model. https:\/\/nvidia.github.io\/cccl\/libcudacxx\/extended_api\/memory_model.html"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507758"},{"key":"e_1_3_2_1_24_1","unstructured":"David Patterson. 2009. The Top 10 Innovations in the New NVIDIA Fermi Architecture and the Top 3 Next Challenges. The Top 10 Innovations in the New NVIDIA Fermi Architecture and the Top 3 Next Challenges. https:\/\/www.nvidia.com.tw\/content\/PDF\/fermi_white_papers\/D.Patterson_Top10InnovationsInNVIDIAFermi.pdf"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575748"},{"key":"e_1_3_2_1_26_1","unstructured":"Tamal Saha Abhishek Rawat and Minh Le. [n. d.]. Fermi - A Complete GPU Compute Architecture by NVIDIA. ([n.d.]). https:\/\/www.cs.virginia.edu\/~skadron\/cs6354_f09_processors\/Fermi.pptx"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2490301.2451169"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2017.8167781"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.05.012"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2016.04.014"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983990.2984032"},{"key":"e_1_3_2_1_32_1","volume-title":"Owens","author":"Stuart Jeff A.","year":"2011","unstructured":"Jeff A. Stuart and John D. Owens. 2011. Efficient Synchronization Primitives for Gpus. arXiv:1110.4623 [cs.OS] https:\/\/arxiv.org\/abs\/1110 4623"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPPW.2010.59"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2013.6657065"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2010.5452013"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018754"},{"key":"e_1_3_2_1_37_1","unstructured":"Huan Zhang Si Si and Cho-Jui Hsieh. 2017. GPU-acceleration for Large-Scale Tree Boosting. arXiv:1706.08359 [stat.ML] https:\/\/arxiv.org\/abs\/1706.08359"},{"key":"e_1_3_2_1_38_1","volume-title":"GPU-accelerated Text Mining. In Workshop on Exploiting Parallelism Using GPUs and Other Hardware-Assisted Methods","author":"Zhang Yongpeng","year":"2009","unstructured":"Yongpeng Zhang, Frank Mueller, Xiaohui Cui, and Thomas Potok. 2009. GPU-accelerated Text Mining. In Workshop on Exploiting Parallelism Using GPUs and Other Hardware-Assisted Methods (2009). ACM Press New York, 1--6."}],"event":{"name":"SOSP '25: ACM SIGOPS 31st Symposium on Operating Systems Principles","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Seoul Republic of Korea","acronym":"SOSP '25"},"container-title":["Proceedings of the 13th Workshop on Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3764860.3768338","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T17:20:32Z","timestamp":1763054432000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3764860.3768338"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"references-count":38,"alternative-id":["10.1145\/3764860.3768338","10.1145\/3764860"],"URL":"https:\/\/doi.org\/10.1145\/3764860.3768338","relation":{},"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"2025-10-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}