{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T21:07:34Z","timestamp":1761253654199,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T00:00:00Z","timestamp":1677456000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shenzhen Science and Technology Program","award":["RCBS20210609104356063,JCYJ20210324120011032"],"award-info":[{"award-number":["RCBS20210609104356063,JCYJ20210324120011032"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["92270201,62006151,62076161,62106213,72150002"],"award-info":[{"award-number":["92270201,62006151,62076161,62106213,72150002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,27]]},"DOI":"10.1145\/3539597.3570470","type":"proceedings-article","created":{"date-parts":[[2023,2,22]],"date-time":"2023-02-22T23:27:00Z","timestamp":1677108420000},"page":"985-993","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Differentially Private Temporal Difference Learning with Stochastic Nonconvex-Strongly-Concave Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1080-9412","authenticated-orcid":false,"given":"Canzhe","family":"Zhao","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8422-8923","authenticated-orcid":false,"given":"Yanjie","family":"Ze","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8579-306X","authenticated-orcid":false,"given":"Jing","family":"Dong","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2997-0970","authenticated-orcid":false,"given":"Baoxiang","family":"Wang","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3935-0708","authenticated-orcid":false,"given":"Shuai","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,2,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2976749.2978318"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. PMLR, 2130--2138","author":"Balle Borja","year":"2016","unstructured":"Borja Balle, Maziar Gomrokchi, and Doina Precup. 2016. Differentially private policy evaluation. In International Conference on Machine Learning. PMLR, 2130--2138."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"e_1_3_2_1_4_1","volume-title":"Conference on learning theory. PMLR, 1691--1692","author":"Bhandari Jalaj","year":"2018","unstructured":"Jalaj Bhandari, Daniel Russo, and Raghav Singal. 2018. A finite time analysis of temporal difference learning with linear function approximation. In Conference on learning theory. PMLR, 1691--1692."},{"key":"e_1_3_2_1_5_1","unstructured":"Digvijay Boob and Crist\u00f3bal Guzm\u00e1n. 2021. Optimal Algorithms for Differentially Private Stochastic Monotone Variational Inequalities and Saddle-Point Problems. (2021). arXiv:2104.02988"},{"key":"e_1_3_2_1_6_1","volume-title":"arXiv:1606.01540","author":"Brockman Greg","year":"2016","unstructured":"Greg Brockman, Vicki Cheung, Ludwig Pettersson, Jonas Schneider, John Schulman, Jie Tang, and Wojciech Zaremba. 2016. OpenAI Gym. (2016). arXiv:1606.01540"},{"key":"e_1_3_2_1_7_1","volume-title":"4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2--4, 2016, Conference Track Proceedings.","author":"Clevert Djork-Arn\u00e9","year":"2016","unstructured":"Djork-Arn\u00e9 Clevert, Thomas Unterthiner, and Sepp Hochreiter. 2016. Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs). In 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2--4, 2016, Conference Track Proceedings."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12079"},{"volume-title":"International Colloquium on Automata, Languages, and Programming","author":"Dwork Cynthia","key":"e_1_3_2_1_9_1","unstructured":"Cynthia Dwork. 2006. Differential privacy. In International Colloquium on Automata, Languages, and Programming. Springer, 1--12."},{"key":"e_1_3_2_1_10_1","first-page":"3","article-title":"The algorithmic foundations of differential privacy","volume":"9","author":"Dwork Cynthia","year":"2014","unstructured":"Cynthia Dwork, Aaron Roth, et al. 2014. The algorithmic foundations of differential privacy. Found. Trends Theor. Comput. Sci. 9, 3--4 (2014), 211--407.","journal-title":"Found. Trends Theor. Comput. Sci."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886799"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1137\/120880811"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1137\/18M1230542"},{"key":"e_1_3_2_1_14_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861--1870."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning, ICML 2017 (Proceedings of Machine Learning Research","volume":"1732","author":"Jin Chi","unstructured":"Chi Jin, Rong Ge, Praneeth Netrapalli, Sham M. Kakade, and Michael I. Jordan. 2017. How to Escape Saddle Points Efficiently. In Proceedings of the 34th International Conference on Machine Learning, ICML 2017 (Proceedings of Machine Learning Research, Vol. 70). PMLR, 1724--1732."},{"key":"e_1_3_2_1_16_1","volume-title":"Conference On Learning Theory, COLT 2018","author":"Jin Chi","year":"2018","unstructured":"Chi Jin, Praneeth Netrapalli, and Michael I. Jordan. 2018. Accelerated Gradient Descent Escapes Saddle Points Faster than Gradient Descent. In Conference On Learning Theory, COLT 2018, Stockholm, Sweden, 6--9 July 2018 (Proceedings of Machine Learning Research, Vol. 75). PMLR, 1042--1085."},{"key":"e_1_3_2_1_17_1","unstructured":"Hongwei Jin and Xun Chen. 2022. Gromov-Wasserstein Discrepancy with Local Differential Privacy for Distributed Structural Graphs. (2022). arXiv:2202.00808"},{"volume-title":"On the sample complexity of reinforcement learning","author":"Kakade Sham Machandranath","key":"e_1_3_2_1_18_1","unstructured":"Sham Machandranath Kakade. 2003. On the sample complexity of reinforcement learning. University of London, University College London (United Kingdom)."},{"key":"e_1_3_2_1_19_1","volume-title":"Actor critic with differentially private critic. arXiv preprint arXiv:1910.05876","author":"Lebensold Jonathan","year":"2019","unstructured":"Jonathan Lebensold, William Hamilton, Borja Balle, and Doina Precup. 2019. Actor critic with differentially private critic. arXiv preprint arXiv:1910.05876 (2019)."},{"key":"e_1_3_2_1_20_1","unstructured":"Sergey Levine Aviral Kumar George Tucker and Justin Fu. 2020. Offline Reinforcement Learning: Tutorial Review and Perspectives on Open Problems. (2020). arXiv:2005.01643"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13--18","volume":"6093","author":"Lin Tianyi","year":"2020","unstructured":"Tianyi Lin, Chi Jin, and Michael I. Jordan. 2020. On Gradient Descent Ascent for Nonconvex-Concave Minimax Problems. In Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13--18 July 2020, Virtual Event (Proceedings of Machine Learning Research, Vol. 119). PMLR, 6083--6093."},{"key":"e_1_3_2_1_22_1","volume-title":"Stochastic Recursive Gradient Descent Ascent for Stochastic Nonconvex-Strongly-Concave Minimax Problems. Advances in Neural Information Processing Systems 33","author":"Luo Luo","year":"2020","unstructured":"Luo Luo, Haishan Ye, Zhichao Huang, and Tong Zhang. 2020. Stochastic Recursive Gradient Descent Ascent for Stochastic Nonconvex-Strongly-Concave Minimax Problems. Advances in Neural Information Processing Systems 33 (2020)."},{"key":"e_1_3_2_1_23_1","unstructured":"Hamid Reza Maei Csaba Szepesvari Shalabh Bhatnagar Doina Precup David Silver and Richard S Sutton. 2009. Convergent temporal-difference learning with arbitrary smooth function approximation.. In NIPS. 1204--1212."},{"key":"e_1_3_2_1_24_1","volume-title":"Sutton","author":"Maei Hamid Reza","year":"2009","unstructured":"Hamid Reza Maei, Csaba Szepesv\u00e1ri, Shalabh Bhatnagar, Doina Precup, David Silver, and Richard S. Sutton. 2009. Convergent Temporal-Difference Learning with Arbitrary Smooth Function Approximation. In Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. 1204--1212."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CSF.2017.11"},{"key":"e_1_3_2_1_26_1","volume-title":"Riedmiller","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin A. Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. arXiv preprint arXiv:1312.5602 (2013)."},{"volume-title":"Introductory Lectures on Convex Optimization - A Basic Course. Applied Optimization","author":"Nesterov Yurii E.","key":"e_1_3_2_1_27_1","unstructured":"Yurii E. Nesterov. 2004. Introductory Lectures on Convex Optimization - A Basic Course. Applied Optimization, Vol. 87. Springer."},{"key":"e_1_3_2_1_28_1","volume-title":"AAMAS '19","author":"Pan Xinlei","year":"2019","unstructured":"Xinlei Pan, Weiyao Wang, Xiaoshuai Zhang, Bo Li, Jinfeng Yi, and Dawn Song. 2019. How You Act Tells a Lot: Privacy-Leaking Attack on Deep Reinforcement Learning. In AAMAS '19 2019. International Foundation for Autonomous Agents and Multiagent Systems, 368--376."},{"key":"e_1_3_2_1_29_1","volume-title":"Single-Timescale Stochastic Nonconvex-Concave Optimization for Smooth Non- linear TD Learning. arXiv preprint arXiv:2008.10103","author":"Qiu Shuang","year":"2020","unstructured":"Shuang Qiu, Zhuoran Yang, Xiaohan Wei, Jieping Ye, and Zhaoran Wang. 2020. Single-Timescale Stochastic Nonconvex-Concave Optimization for Smooth Non- linear TD Learning. arXiv preprint arXiv:2008.10103 (2020)."},{"key":"e_1_3_2_1_30_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Roy Abhishek","year":"2020","unstructured":"Abhishek Roy, Krishnakumar Balasubramanian, Saeed Ghadimi, and Prasant Mohapatra. 2020. Escaping Saddle-Point Faster under Interpolation-like Conditions. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual."},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 387--395","author":"Silver David","year":"2014","unstructured":"David Silver, Guy Lever, Nicolas Heess, Thomas Degris, Daan Wierstra, and Martin Riedmiller. 2014. Deterministic policy gradient algorithms. In International conference on machine learning. PMLR, 387--395."},{"key":"e_1_3_2_1_32_1","volume-title":"Adaptive temporal difference learning with linear function approximation. arXiv preprint arXiv:2002.08537","author":"Sun Tao","year":"2020","unstructured":"Tao Sun, Han Shen, Tianyi Chen, and Dongsheng Li. 2020. Adaptive temporal difference learning with linear function approximation. arXiv preprint arXiv:2002.08537 (2020)."},{"key":"e_1_3_2_1_33_1","volume-title":"Learning to predict by the methods of temporal differences. Machine learning 3, 1","author":"Sutton Richard S","year":"1988","unstructured":"Richard S Sutton. 1988. Learning to predict by the methods of temporal differences. Machine learning 3, 1 (1988), 9--44."},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","key":"e_1_3_2_1_34_1","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Richard S Sutton Csaba Szepesv\u00e1ri and Hamid Reza Maei. 2008. A convergent o (n) temporal-difference algorithm for off-policy learning with linear function approximation. In NIPS.","DOI":"10.1145\/1553374.1553501"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, February 12--17, 2016","author":"Aristide C.","year":"2087","unstructured":"Aristide C. Y. Tossou and Christos Dimitrakakis. 2016. Algorithms for Differentially Private Multi-Armed Bandits. In Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, February 12--17, 2016, Phoenix, Arizona, USA. AAAI Press, 2087--2093."},{"key":"e_1_3_2_1_38_1","volume-title":"International Conference on Machine Learning. PMLR, 9754--9764","author":"Vietri Giuseppe","year":"2020","unstructured":"Giuseppe Vietri, Borja Balle, Akshay Krishnamurthy, and Steven Wu. 2020. Private reinforcement learning with pac and regret guarantees. In International Conference on Machine Learning. PMLR, 9754--9764."},{"key":"e_1_3_2_1_39_1","volume-title":"Variance Reduced Policy Evaluation with Smooth Function Approximation. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Wai Hoi-To","year":"2019","unstructured":"Hoi-To Wai, Mingyi Hong, Zhuoran Yang, Zhaoran Wang, and Kexin Tang. 2019. Variance Reduced Policy Evaluation with Smooth Function Approximation. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8--14, 2019, Vancouver, BC, Canada. 5776--5787."},{"key":"e_1_3_2_1_40_1","volume-title":"Privacy-Preserving Q-Learning with Functional Noise in Continuous Spaces. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Wang Baoxiang","year":"2019","unstructured":"Baoxiang Wang and Nidhi Hegde. 2019. Privacy-Preserving Q-Learning with Functional Noise in Continuous Spaces. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8--14, 2019, Vancouver, BC, Canada. 11323--11333."},{"key":"e_1_3_2_1_41_1","volume-title":"International Conference on Machine Learning. PMLR, 6526--6535","author":"Wang Di","year":"2019","unstructured":"Di Wang, Changyou Chen, and Jinhui Xu. 2019. Differentially private empirical risk minimization with non-convex loss functions. In International Conference on Machine Learning. PMLR, 6526--6535."},{"key":"e_1_3_2_1_42_1","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems","author":"Wang Di","year":"2017","unstructured":"Di Wang, Minwei Ye, and Jinhui Xu. 2017. Differentially Private Empirical Risk Minimization Revisited: Faster and More General. In Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4--9, 2017, Long Beach, CA, USA. 2722--2731."},{"key":"e_1_3_2_1_43_1","unstructured":"Lingxiao Wang Bargav Jayaraman David Evans and Quanquan Gu. 2019. Efficient privacy-preserving nonconvex optimization. (2019) arXiv--1910."},{"key":"e_1_3_2_1_44_1","unstructured":"Yue Wang Shaofeng Zou and Yi Zhou. 2021. Finite-Sample Analysis for Two Time-scale Non-linear TDC with General Smooth Function Approximation. arXiv:2104.02836"},{"key":"e_1_3_2_1_45_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Wu Yue","year":"2020","unstructured":"Yue Wu, Weitong Zhang, Pan Xu, and Quanquan Gu. 2020. A Finite-Time Analysis of Two Time-Scale Actor-Critic Methods. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6--12, 2020, virtual."},{"key":"e_1_3_2_1_46_1","unstructured":"Zhenhuan Yang Shu Hu Yunwen Lei Kush R. Varshney Siwei Lyu and Yiming Ying. 2022. Differentially Private SGDA for Minimax Problems. (2022). arXiv:2201.09046"},{"key":"e_1_3_2_1_47_1","volume-title":"International Conference on Machine Learning. PMLR, 11204--11213","author":"Zhang Shangtong","year":"2020","unstructured":"Shangtong Zhang, Bo Liu, Hengshuai Yao, and Shimon Whiteson. 2020. Provably convergent two-timescale off-policy actor-critic with function approximation. In International Conference on Machine Learning. PMLR, 11204--11213."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489048.3522648"}],"event":{"name":"WSDM '23: The Sixteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Singapore Singapore","acronym":"WSDM '23"},"container-title":["Proceedings of the Sixteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570470","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539597.3570470","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:15Z","timestamp":1750186935000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570470"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,27]]},"references-count":48,"alternative-id":["10.1145\/3539597.3570470","10.1145\/3539597"],"URL":"https:\/\/doi.org\/10.1145\/3539597.3570470","relation":{},"subject":[],"published":{"date-parts":[[2023,2,27]]},"assertion":[{"value":"2023-02-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}