{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T15:30:46Z","timestamp":1773588646173,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,22]]},"DOI":"10.1145\/3779212.3790247","type":"proceedings-article","created":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T13:55:26Z","timestamp":1773150926000},"page":"2212-2231","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["<scp>Wave<\/scp>\n                    : Leveraging Architecture Observation for Privacy-Preserving Model Oversight"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0252-3376","authenticated-orcid":false,"given":"Haoxuan","family":"Xu","sequence":"first","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9573-3041","authenticated-orcid":false,"given":"Chen","family":"Gong","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7218-8844","authenticated-orcid":false,"given":"Beijie","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0478-4028","authenticated-orcid":false,"given":"Haizhong","family":"Zheng","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9166-6476","authenticated-orcid":false,"given":"Beidi","family":"Chen","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2721-4021","authenticated-orcid":false,"given":"Mengyuan","family":"Li","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,3,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338466.3358916"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD ROCm Team. 2025. ROCm ROCProfiler. https:\/\/rocm.docs.amd.com\/projects\/rocprofiler\/en\/latest\/ Accessed: 2025-08-21."},{"key":"e_1_3_2_1_3_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et al. 2023. Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"e_1_3_2_1_4_1","unstructured":"Bing-Jyue Chen Lilia Tang and Daniel Kang. 2025. ZKTorch: Compiling ML Inference to Zero-Knowledge Proofs via Parallel Proof Accumulation. arXiv:2507.07031 [cs.CR] https:\/\/arxiv.org\/abs\/2507.07031"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3650088"},{"key":"e_1_3_2_1_6_1","unstructured":"NVIDIA Corporation. 2025a. CUDA Profiling Tools Interface (CUPTI). https:\/\/developer.nvidia.com\/cupti Accessed: 2025-08-21."},{"key":"e_1_3_2_1_7_1","unstructured":"NVIDIA Corporation. 2025b. NVIDIA Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute Accessed: 2025-08-21."},{"key":"e_1_3_2_1_8_1","unstructured":"NVIDIA Corporation. 2025c. NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems Accessed: 2025-08-21."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2021.3073626"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2022.3145814"},{"key":"e_1_3_2_1_11_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research, Vol. 23, 120 (2022), 1-39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3361525.3361541"},{"key":"e_1_3_2_1_13_1","volume-title":"Scalable zero-knowledge proofs for non-linear functions in machine learning (SEC '24)","author":"Hao Meng","unstructured":"Meng Hao, Hanxiao Chen, Hongwei Li, Chenkai Weng, Yuan Zhang, Haomiao Yang, and Tianwei Zhang. 2024. Scalable zero-knowledge proofs for non-linear functions in machine learning (SEC '24). USENIX Association, USA, Article 214, 18 pages."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378460"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2209.03125"},{"key":"e_1_3_2_1_16_1","unstructured":"Daniel Kang Tatsunori Hashimoto Ion Stoica and Yi Sun. 2022. Scaling up Trustless DNN Inference with Zero-Knowledge Proofs. arXiv:2210.08674 [cs.CR] https:\/\/arxiv.org\/abs\/2210.08674"},{"key":"e_1_3_2_1_17_1","unstructured":"John Kirchenbauer Jonas Geiping Yuxin Wen Jonathan Katz Ian Miers and Tom Goldstein. 2024a. A Watermark for Large Language Models. arXiv:2301.10226 [cs.LG] https:\/\/arxiv.org\/abs\/2301.10226"},{"key":"e_1_3_2_1_18_1","volume-title":"On the Reliability of Watermarks for Large Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=DEJIDCmWOz","author":"Kirchenbauer John","year":"2024","unstructured":"John Kirchenbauer, Jonas Geiping, Yuxin Wen, Manli Shu, Khalid Saifullah, Kezhi Kong, Kasun Fernando, Aniruddha Saha, Micah Goldblum, and Tom Goldstein. 2024b. On the Reliability of Watermarks for Large Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=DEJIDCmWOz"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3486001.3486224"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","unstructured":"Ben Laurie Adam Langley and Emilia Kasper. 2013. Certificate Transparency. RFC 6962. https:\/\/doi.org\/10.17487\/RFC6962","DOI":"10.17487\/RFC6962"},{"key":"e_1_3_2_1_22_1","volume-title":"Data-driven power modeling and monitoring via hardware performance counter tracking. Journal of Systems Architecture","author":"Mazzola Sergio","year":"2025","unstructured":"Sergio Mazzola, Gabriele Ara, Thomas Benz, Bj\u00f6rn Forsberg, Tommaso Cucinotta, and Luca Benini. 2025. Data-driven power modeling and monitoring via hardware performance counter tracking. Journal of Systems Architecture (2025), 103504."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3243734.3243831"},{"key":"e_1_3_2_1_24_1","unstructured":"NVIDIA. 2023. Confidential Compute on NVIDIA Hopper H100. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/HCC-Whitepaper-v1.0.pdf."},{"key":"e_1_3_2_1_25_1","unstructured":"OpenAI Forum. 2024a. GPT4-Turbo more ''stupid\/lazy'' - it's not a GPT4. OpenAI Community Forum post. https:\/\/community.openai.com\/t\/gpt4-turbo-more-stupid-lazy-its-not-a-gpt4\/608008"},{"key":"e_1_3_2_1_26_1","unstructured":"OpenAI Forum. 2024b. OpenAI did made GPT3.5 more stupid? OpenAI Community Forum post. https:\/\/community.openai.com\/t\/openai-did-made-gpt3-5-more-stupid\/262979"},{"key":"e_1_3_2_1_27_1","volume-title":"Knockoff Nets: Stealing Functionality of Black-Box Models. arXiv:1812.02766 [cs.CV] https:\/\/arxiv.org\/abs\/1812.02766","author":"Orekondy Tribhuvanesh","year":"2018","unstructured":"Tribhuvanesh Orekondy, Bernt Schiele, and Mario Fritz. 2018. Knockoff Nets: Stealing Functionality of Black-Box Models. arXiv:1812.02766 [cs.CV] https:\/\/arxiv.org\/abs\/1812.02766"},{"key":"e_1_3_2_1_28_1","volume-title":"Train Short","author":"Press Ofir","unstructured":"Ofir Press, Noah A. Smith, and Mike Lewis. 2022. Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation. arXiv:2108.12409 [cs.CL] https:\/\/arxiv.org\/abs\/2108.12409"},{"key":"e_1_3_2_1_29_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_30_1","unstructured":"Alex Saltanov. 2024. OpenAI Keeps Dumbing Down ChatGPT. AI Mind (Medium publication) article. https:\/\/pub.aimind.so\/openai-keeps-dumbing-down-chatgpt-6a6e4a173237"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629578"},{"key":"e_1_3_2_1_32_1","unstructured":"Guoheng Sun Ziyao Wang Bowei Tian Meng Liu Zheyu Shen Shwai He Yexiao He Wanghao Ye Yiting Wang and Ang Li. 2025b. CoIn: Counting the Invisible Reasoning Tokens in Commercial Opaque LLM APIs. arXiv:2505.13778 [cs.AI] https:\/\/arxiv.org\/abs\/2505.13778"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658644.3670334"},{"key":"e_1_3_2_1_34_1","volume-title":"SVIP: Towards Verifiable Inference of Open-source Large Language Models. arXiv:2410.22307 [cs.LG] https:\/\/arxiv.org\/abs\/2410.22307","author":"Sun Yifan","year":"2025","unstructured":"Yifan Sun, Yuhang Li, Yue Zhang, Yuchen Jin, and Huan Zhang. 2025a. SVIP: Towards Verifiable Inference of Open-source Large Language Models. arXiv:2410.22307 [cs.LG] https:\/\/arxiv.org\/abs\/2410.22307"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93387-0_34"},{"key":"e_1_3_2_1_36_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al., 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_37_1","unstructured":"Florian Tram\u00e8r Fan Zhang Ari Juels Michael K. Reiter and Thomas Ristenpart. 2016. Stealing Machine Learning Models via Prediction APIs. arXiv:1609.02943 [cs.CR] https:\/\/arxiv.org\/abs\/1609.02943"},{"key":"e_1_3_2_1_38_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, ?ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358307"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2015.7095789"},{"key":"e_1_3_2_1_41_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al., 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems, Vol. 35 (2022), 24824-24837."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Yuchen Xia Jiho Kim Yuhan Chen Haojie Ye Souvik Kundu Cong Hao and Nishil Talati. 2024. Understanding the Performance and Estimating the Cost of LLM Fine-Tuning. arXiv:2408.04693 [cs.CL] https:\/\/arxiv.org\/abs\/2408.04693","DOI":"10.1109\/IISWC63097.2024.00027"},{"key":"e_1_3_2_1_43_1","first-page":"521","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu Gyeong-In","year":"2022","unstructured":"Gyeong-In Yu, Joo Seong Jeong, Geon-Woo Kim, Soojeong Kim, and Byung-Gon Chun. 2022. Orca: A distributed serving system for generative models. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). 521-538."},{"key":"e_1_3_2_1_44_1","unstructured":"Tianchen Zhang Gururaj Saileshwar and David Lie. 2024. Time Will Tell: Timing Side Channels via Output Token Count in Large Language Models. arXiv:2412.15431 [cs.LG] https:\/\/arxiv.org\/abs\/2412.15431"},{"key":"e_1_3_2_1_45_1","volume-title":"Provable Robust Watermarking for AI-Generated Text. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SsmT8aO45L","author":"Zhao Xuandong","year":"2024","unstructured":"Xuandong Zhao, Prabhanjan Vijendra Ananth, Lei Li, and Yu-Xiang Wang. 2024. Provable Robust Watermarking for AI-Generated Text. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SsmT8aO45L"},{"key":"e_1_3_2_1_46_1","volume-title":"Jeff Huang, Cody Hao Yu, Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E Gonzalez, et al.","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Liangsheng Yin, Zhiqiang Xie, Chuyue Livia Sun, Jeff Huang, Cody Hao Yu, Shiyi Cao, Christos Kozyrakis, Ion Stoica, Joseph E Gonzalez, et al., 2024. Sglang: Efficient execution of structured language model programs. Advances in neural information processing systems, Vol. 37 (2024), 62557-62583."}],"event":{"name":"ASPLOS '26: 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Pittsburgh PA USA","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T14:00:38Z","timestamp":1773583238000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3779212.3790247"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,22]]},"references-count":46,"alternative-id":["10.1145\/3779212.3790247","10.1145\/3779212"],"URL":"https:\/\/doi.org\/10.1145\/3779212.3790247","relation":{},"subject":[],"published":{"date-parts":[[2026,3,22]]},"assertion":[{"value":"2026-03-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}