{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:00:18Z","timestamp":1750309218394,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3676536.3676661","type":"proceedings-article","created":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T12:53:56Z","timestamp":1744203236000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PrivQuant: Communication-Efficient Private Inference with Quantized Network\/Protocol Co-Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7832-828X","authenticated-orcid":false,"given":"Tianshi","family":"Xu","sequence":"first","affiliation":[{"name":"Peking University, School of Integrated Circuits, Beijing, China"},{"name":"Peking University, Institute for Artificial Intelligence, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5478-3604","authenticated-orcid":false,"given":"Shuzhang","family":"Zhong","sequence":"additional","affiliation":[{"name":"Peking University, Institute for Artificial Intelligence, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3743-0947","authenticated-orcid":false,"given":"Wenxuan","family":"Zeng","sequence":"additional","affiliation":[{"name":"Peking University, School of Software and Microelectronics, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7514-0767","authenticated-orcid":false,"given":"Runsheng","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University, School of Integrated Circuit, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7212-2264","authenticated-orcid":false,"given":"Meng","family":"Li","sequence":"additional","affiliation":[{"name":"Peking University, Institute for Artificial Intelligence, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,9]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1501","volume-title":"28th USENIX Security Symposium (USENIX Security 19)","author":"Riazi M Sadegh","year":"2019","unstructured":"M Sadegh Riazi, Mohammad Samragh, Hao Chen, Kim Laine, Kristin Lauter, and Farinaz Koushanfar. {XONN}:{XNOR-based} oblivious deep neural network inference. In 28th USENIX Security Symposium (USENIX Security 19), pages 1501--1518, 2019."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460120.3484797"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3417274"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP40001.2021.00086"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.12"},{"key":"e_1_3_2_1_6_1","first-page":"201","volume-title":"International conference on machine learning","author":"Gilad-Bachrach Ran","year":"2016","unstructured":"Ran Gilad-Bachrach, Nathan Dowlin, Kim Laine, Kristin Lauter, Michael Naehrig, and John Wernsing. Cryptonets: Applying neural networks to encrypted data with high throughput and accuracy. In International conference on machine learning, pages 201--210. PMLR, 2016."},{"key":"e_1_3_2_1_7_1","volume-title":"Quantizing deep convolutional networks for efficient inference: A whitepaper. arXiv preprint arXiv:1806.08342","author":"Krishnamoorthi Raghuraman","year":"2018","unstructured":"Raghuraman Krishnamoorthi. Quantizing deep convolutional networks for efficient inference: A whitepaper. arXiv preprint arXiv:1806.08342, 2018."},{"key":"e_1_3_2_1_8_1","first-page":"78906","article-title":"Network\/protocol co-optimization for communication-efficient private inference","volume":"36","author":"Zeng Wenxuan","year":"2023","unstructured":"Wenxuan Zeng, Meng Li, Haichuan Yang, Wen-jie Lu, Runsheng Wang, and Ru Huang. Copriv: Network\/protocol co-optimization for communication-efficient private inference. Advances in Neural Information Processing Systems, 36:78906--78925, 2023.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133956.3134056"},{"key":"e_1_3_2_1_10_1","volume-title":"Secure multi-party computation","author":"Goldreich Oded","year":"1998","unstructured":"Oded Goldreich. Secure multi-party computation. 1998."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.12"},{"key":"e_1_3_2_1_12_1","volume-title":"Jan","author":"Juvekar Chiraag","year":"2018","unstructured":"Chiraag Juvekar, Vinod Vaikuntanathan, and AnanthaP. Chandrakasan. GAZELLE: A low latency framework for secure neural network inference, Jan 2018."},{"key":"e_1_3_2_1_13_1","first-page":"809","volume-title":"31st USENIX Security Symposium (USENIX Security 22)","author":"Huang Zhicong","year":"2022","unstructured":"Zhicong Huang, Wen-jie Lu, Cheng Hong, and Jiansheng Ding. Cheetah: Lean and fast secure {Two-Party} deep neural network inference. In 31st USENIX Security Symposium (USENIX Security 22), pages 809--826, 2022."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD57390.2023.10323672"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3243734.3243837"},{"key":"e_1_3_2_1_16_1","volume-title":"Advances in Neural Information Processing Systems","author":"Hao Meng","year":"2022","unstructured":"Meng Hao, Hongwei Li, Hanxiao Chen, Pengzhi Xing, Guowen Xu, and Tianwei Zhang. Iron: Private inference on transformers. In Advances in Neural Information Processing Systems, 2022."},{"key":"e_1_3_2_1_17_1","volume-title":"Jan","author":"Mishra Pratyush","year":"2020","unstructured":"Pratyush Mishra, Ryan Lehmkuhl, Akshayaram Srinivasan, Wenting Zheng, and RalucaAda Popa. Delphi: A cryptographic inference service for neural networks, Jan 2020."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3407023.3407045"},{"key":"e_1_3_2_1_19_1","volume-title":"Mpcformer: fast, performant and private transformer inference with mpc. arXiv preprint arXiv:2211.01452","author":"Li Dacheng","year":"2022","unstructured":"Dacheng Li, Rulin Shao, Hongyi Wang, Han Guo, Eric P Xing, and Hao Zhang. Mpcformer: fast, performant and private transformer inference with mpc. arXiv preprint arXiv:2211.01452, 2022."},{"key":"e_1_3_2_1_20_1","first-page":"16961","article-title":"Private inference on a relu budget","volume":"33","author":"Ghodsi Zahra","year":"2020","unstructured":"Zahra Ghodsi, Akshaj Kumar Veldanda, Brandon Reagen, and Siddharth Garg. Cryptonas: Private inference on a relu budget. Advances in Neural Information Processing Systems, 33:16961--16971, 2020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Learning Representations","author":"Lou Qian","year":"2020","unstructured":"Qian Lou, Yilin Shen, Hongxia Jin, and Lei Jiang. Safenet: A secure, accurate and fast neural network inference. In International Conference on Learning Representations, 2020."},{"key":"e_1_3_2_1_22_1","first-page":"4839","volume-title":"International Conference on Machine Learning","author":"Jha Nandan Kumar","year":"2021","unstructured":"Nandan Kumar Jha, Zahra Ghodsi, Siddharth Garg, and Brandon Reagen. Deepreduce: Relu reduction for fast private inference. In International Conference on Machine Learning, pages 4839--4849. PMLR, 2021."},{"key":"e_1_3_2_1_23_1","volume-title":"Sphynx: Relu-efficient network design for private inference. arXiv preprint arXiv:2106.11755","author":"Cho Minsu","year":"2021","unstructured":"Minsu Cho, Zahra Ghodsi, Brandon Reagen, Siddharth Garg, and Chinmay Hegde. Sphynx: Relu-efficient network design for private inference. arXiv preprint arXiv:2106.11755, 2021."},{"key":"e_1_3_2_1_24_1","first-page":"3947","volume-title":"International Conference on Machine Learning","author":"Cho Minsu","year":"2022","unstructured":"Minsu Cho, Ameya Joshi, Brandon Reagen, Siddharth Garg, and Chinmay Hegde. Selective network linearization for efficient private inference. In International Conference on Machine Learning, pages 3947--3961. PMLR, 2022."},{"key":"e_1_3_2_1_25_1","volume-title":"Mpcvit: Searching for mpc-friendly vision transformer with heterogeneous attention. arXiv preprint arXiv:2211.13955","author":"Zeng Wenxuan","year":"2022","unstructured":"Wenxuan Zeng, Meng Li, Wenjie Xiong, Wenjie Lu, Jin Tan, Runsheng Wang, and Ru Huang. Mpcvit: Searching for mpc-friendly vision transformer with heterogeneous attention. arXiv preprint arXiv:2211.13955, 2022."},{"key":"e_1_3_2_1_26_1","volume-title":"Hequant: Marrying homomorphic encryption and quantization for communication-efficient private inference","author":"Xu Tianshi","year":"2024","unstructured":"Tianshi Xu, Meng Li, and Runsheng Wang. Hequant: Marrying homomorphic encryption and quantization for communication-efficient private inference, 2024."},{"key":"e_1_3_2_1_27_1","volume-title":"Apr","author":"Lindell Yehuda","year":"2009","unstructured":"Yehuda Lindell and Benny Pinkas. A proof of security of yao's protocol for two-party computation. Journal of Cryptology, page 161--188, Apr 2009."},{"key":"e_1_3_2_1_28_1","volume-title":"IACR Cryptology ePrint Archive","author":"Rabin O.","year":"1981","unstructured":"MichaelO. Rabin. How to exchange secrets with oblivious transfer. IACR Cryptology ePrint Archive, IACR Cryptology ePrint Archive, Jan 1981."},{"key":"e_1_3_2_1_29_1","volume-title":"Mixed precision quantization of convnets via differentiable neural architecture search. ArXiv, abs\/1812.00090","author":"Wu Bichen","year":"2018","unstructured":"Bichen Wu, Yanghan Wang, Peizhao Zhang, Yuandong Tian, P\u00e9ter Vajda, and Kurt Keutzer. Mixed precision quantization of convnets via differentiable neural architecture search. ArXiv, abs\/1812.00090, 2018."},{"key":"e_1_3_2_1_30_1","volume-title":"Fracbits: Mixed precision quantization via fractional bit-widths. arXiv preprint arXiv:2007.02017","author":"Yang Linjie","year":"2020","unstructured":"Linjie Yang and Qing Jin. Fracbits: Mixed precision quantization via fractional bit-widths. arXiv preprint arXiv:2007.02017, 2020."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00524"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00038"},{"key":"e_1_3_2_1_33_1","volume-title":"Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems, 33:18518--18529","author":"Dong Zhen","year":"2020","unstructured":"Zhen Dong, Zhewei Yao, Daiyaan Arfeen, Amir Gholami, Michael W Mahoney, and Kurt Keutzer. Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems, 33:18518--18529, 2020."},{"key":"e_1_3_2_1_34_1","first-page":"11875","volume-title":"International Conference on Machine Learning","author":"Yao Zhewei","year":"2021","unstructured":"Zhewei Yao, Zhen Dong, Zhangcheng Zheng, Amir Gholami, Jiali Yu, Eric Tan, Leyuan Wang, Qijing Huang, Yida Wang, Michael Mahoney, et al. Hawq-v3: Dyadic neural network quantization. In International Conference on Machine Learning, pages 11875--11886. PMLR, 2021."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00065"},{"key":"e_1_3_2_1_37_1","volume-title":"Autoaugment: Learning augmentation policies from data. arXiv preprint arXiv:1805.09501","author":"Cubuk Ekin D","year":"2018","unstructured":"Ekin D Cubuk, Barret Zoph, Dandelion Mane, Vijay Vasudevan, and Quoc V Le. Autoaugment: Learning augmentation policies from data. arXiv preprint arXiv:1805.09501, 2018."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00612"},{"key":"e_1_3_2_1_39_1","volume-title":"mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412","author":"Zhang Hongyi","year":"2017","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann N Dauphin, and David Lopez-Paz. mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412, 2017."},{"key":"e_1_3_2_1_40_1","volume-title":"Programmable, efficient, and scalable secure two-party computation for machine learning. Cryptology ePrint Archive","author":"Chandran Nishanth","year":"2017","unstructured":"Nishanth Chandran, Divya Gupta, Aseem Rastogi, Rahul Sharma, and Shardul Tripathi. Ezpc: Programmable, efficient, and scalable secure two-party computation for machine learning. Cryptology ePrint Archive, 2017."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP40000.2020.00092"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46214.2022.9833697"},{"key":"e_1_3_2_1_43_1","volume-title":"Gala: Greedy computation for linearalgebra in privacy-preserved neural networks. arXiv preprint arXiv:2105.01827","author":"Zhang Qiao","year":"2021","unstructured":"Qiao Zhang, Chunsheng Xin, and Hongyi Wu. Gala: Greedy computation for linearalgebra in privacy-preserved neural networks. arXiv preprint arXiv:2105.01827, 2021."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196494.3196522"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.56553\/popets-2022-0109"},{"key":"e_1_3_2_1_46_1","volume-title":"A survey on model compression for large language models. arXiv preprint arXiv:2308.07633","author":"Zhu Xunyu","year":"2023","unstructured":"Xunyu Zhu, Jian Li, Yong Liu, Can Ma, and Weiping Wang. A survey on model compression for large language models. arXiv preprint arXiv:2308.07633, 2023."}],"event":{"name":"ICCAD '24: 43rd IEEE\/ACM International Conference on Computer-Aided Design","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE CEDA","IEEE EDS"],"location":"Newark Liberty International Airport Marriott New York NY USA","acronym":"ICCAD '24"},"container-title":["Proceedings of the 43rd IEEE\/ACM International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676661","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676536.3676661","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:43:57Z","timestamp":1750290237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676661"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":46,"alternative-id":["10.1145\/3676536.3676661","10.1145\/3676536"],"URL":"https:\/\/doi.org\/10.1145\/3676536.3676661","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2025-04-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}