{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T16:11:40Z","timestamp":1774627900024,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T00:00:00Z","timestamp":1741564800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["WK2150110034"],"award-info":[{"award-number":["WK2150110034"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Anhui Provincial Natural Science Foundation","award":["2308085QF229"],"award-info":[{"award-number":["2308085QF229"]}]},{"name":"Joint Research Project of the Science and Technology Innovation Community in the Yangtze River Delta","award":["2023CSJZN0200"],"award-info":[{"award-number":["2023CSJZN0200"]}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62406303"],"award-info":[{"award-number":["62406303"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Anhui Science and Technology Innovation Plan","award":["202423k09020010"],"award-info":[{"award-number":["202423k09020010"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,10]]},"DOI":"10.1145\/3701551.3703585","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T12:33:36Z","timestamp":1740573216000},"page":"963-972","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["ProCC: Programmatic Reinforcement Learning for Efficient and Transparent TCP Congestion Control"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6734-9085","authenticated-orcid":false,"given":"Yin","family":"Gu","sequence":"first","affiliation":[{"name":"State Key Lab of Cognitive Intelligence, University of Science and Technology of China, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5335-2470","authenticated-orcid":false,"given":"Kai","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Lab of Cognitive Intelligence, University of Science and Technology of China, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6956-5550","authenticated-orcid":false,"given":"Qi","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Lab of Cognitive Intelligence, University of Science and Technology of China, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4080-2377","authenticated-orcid":false,"given":"Runlong","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Pittsburgh, Pittsburgh, Pennsylvania, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6913-4654","authenticated-orcid":false,"given":"Xin","family":"Lin","sequence":"additional","affiliation":[{"name":"State Key Lab of Cognitive Intelligence, University of Science and Technology of China, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4147-8094","authenticated-orcid":false,"given":"Xinjie","family":"Sun","sequence":"additional","affiliation":[{"name":"State Key Lab of Cognitive Intelligence, University of Science and Technology of China, Hefei, Anhui, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3387514.3405892"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512276"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3232755.3232783"},{"key":"e_1_3_2_1_4_1","volume-title":"Verifiable reinforcement learning via policy extraction. Advances in neural information processing systems","author":"Bastani Osbert","year":"2018","unstructured":"Osbert Bastani, Yewen Pu, and Armando Solar-Lezama. 2018. Verifiable reinforcement learning via policy extraction. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/190314.190317"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3009824"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583341"},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on computers and games. Springer, 72--83","author":"Coulom R\u00e9mi","year":"2006","unstructured":"R\u00e9mi Coulom. 2006. Efficient selectivity and backup operators in Monte-Carlo tree search. In International conference on computers and games. Springer, 72--83."},{"key":"e_1_3_2_1_9_1","volume-title":"12th USENIX Symposium on Networked Systems Design and Implementation (NSDI 15)","author":"Dong Mo","year":"2015","unstructured":"Mo Dong, Qingxi Li, Doron Zarchy, P Brighten Godfrey, and Michael Schapira. 2015. {PCC}: Re-architecting congestion control for consistent high performance. In 12th USENIX Symposium on Networked Systems Design and Implementation (NSDI 15). 395--408."},{"key":"e_1_3_2_1_10_1","volume-title":"15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18)","author":"Dong Mo","year":"2018","unstructured":"Mo Dong, Tong Meng, Doron Zarchy, Engin Arslan, Yossi Gilad, Brighten Godfrey, and Michael Schapira. 2018. {PCC} Vivace:{Online-Learning} Congestion Control. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18). 343--356."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472936"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNSE.2022.3185253"},{"key":"e_1_3_2_1_13_1","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"Ernst Damien","year":"2005","unstructured":"Damien Ernst, Pierre Geurts, and Louis Wehenkel. 2005. Tree-based batch mode reinforcement learning. Journal of Machine Learning Research, Vol. 6 (2005).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Sally Floyd Tom Henderson and Andrei Gurtov. 2004. The NewReno modification to TCP's fast recovery algorithm. Technical Report.","DOI":"10.17487\/rfc3782"},{"key":"e_1_3_2_1_15_1","volume-title":"A survey on interpretable reinforcement learning. arXiv preprint arXiv:2112.13112","author":"Glanois Claire","year":"2021","unstructured":"Claire Glanois, Paul Weng, Matthieu Zimmer, Dong Li, Tianpei Yang, Jianye Hao, and Wulong Liu. 2021. A survey on interpretable reinforcement learning. arXiv preprint arXiv:2112.13112 (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Gu Yin","year":"2024","unstructured":"Yin Gu, Kai Zhang, Qi Liu, Weibo Gao, Longfei Li, and Jun Zhou. 2024. \u03c0-light: Programmatic interpretable reinforcement learning for resource-limited traffic signal control. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38. 21107--21115."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1400097.1400105"},{"key":"e_1_3_2_1_18_1","volume-title":"Congestion avoidance and control. ACM SIGCOMM computer communication review","author":"Jacobson Van","year":"1988","unstructured":"Van Jacobson. 1988. Congestion avoidance and control. ACM SIGCOMM computer communication review, Vol. 18, 4 (1988), 314--329."},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Machine Learning. PMLR, 3050--3059","author":"Jay Nathan","year":"2019","unstructured":"Nathan Jay, Noga Rotman, Brighten Godfrey, Michael Schapira, and Aviv Tamar. 2019. A deep reinforcement learning perspective on internet congestion control. In International Conference on Machine Learning. PMLR, 3050--3059."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICC45855.2022.9838901"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2020.07.018"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/11871842_29"},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Machine Learning. PMLR, 5979--5989","author":"Landajuela Mikel","year":"2021","unstructured":"Mikel Landajuela, Brenden K Petersen, Sookyung Kim, Claudio P Santiago, Ruben Glatt, Nathan Mundhenk, Jacob F Pettit, and Daniel Faissol. 2021. Discovering symbolic policies with deep reinforcement learning. In International Conference on Machine Learning. PMLR, 5979--5989."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3387514.3405859"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. nature Vol. 518 7540 (2015) 529--533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_26_1","volume-title":"State of the art-a survey of partially observable Markov decision processes: theory, models, and algorithms. Management science","author":"Monahan George E","year":"1982","unstructured":"George E Monahan. 1982. State of the art-a survey of partially observable Markov decision processes: theory, models, and algorithms. Management science, Vol. 28, 1 (1982), 1--16."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Rohan Paleja Yaru Niu Andrew Silva Chace Ritchie Sugju Choi and Matthew Gombolay. 2022. Learning Interpretable High-Performing Policies for Autonomous Driving. In Robotics: Science and Systems (RSS).","DOI":"10.15607\/RSS.2022.XVIII.068"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3281411.3281430"},{"key":"e_1_3_2_1_29_1","volume-title":"The Tenth International Conference on Learning Representations.","author":"Qiu Wenjie","year":"2022","unstructured":"Wenjie Qiu and He Zhu. 2022. Programmatic Reinforcement Learning without Oracles. In The Tenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCN.2010.5560080"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM42981.2021.9488851"},{"key":"e_1_3_2_1_32_1","volume-title":"Mohit Aggarwal, Brian Wang, Liying Han, Julian de Gortari Briseno, and Mani Srivastava.","author":"Saha Swapnil Sayan","year":"2023","unstructured":"Swapnil Sayan Saha, Sandeep Singh Sandha, Mohit Aggarwal, Brian Wang, Liying Han, Julian de Gortari Briseno, and Mani Srivastava. 2023. TinyNS: Platform-Aware Neurosymbolic Auto Tiny Machine Learning. ACM Transactions on Embedded Computing Systems (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Nature","volume":"588","author":"Schrittwieser Julian","year":"2020","unstructured":"Julian Schrittwieser, Ioannis Antonoglou, Thomas Hubert, Karen Simonyan, Laurent Sifre, Simon Schmitt, Arthur Guez, Edward Lockhart, Demis Hassabis, Thore Graepel, et al. 2020. Mastering atari, go, chess and shogi by planning with a learned model. Nature, Vol. 588, 7839 (2020), 604--609."},{"key":"e_1_3_2_1_34_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_35_1","first-page":"10684","article-title":"Symbolic Distillation for Learned TCP Congestion Control","volume":"35","author":"Sharan SP","year":"2022","unstructured":"SP Sharan, Wenqing Zheng, Kuo-Feng Hsu, Jiarong Xing, Ang Chen, and Zhangyang Wang. 2022. Symbolic Distillation for Learned TCP Congestion Control. Advances in Neural Information Processing Systems, Vol. 35 (2022), 10684--10695.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"David Silver Julian Schrittwieser Karen Simonyan Ioannis Antonoglou Aja Huang Arthur Guez Thomas Hubert Lucas Baker Matthew Lai Adrian Bolton et al. 2017. Mastering the game of go without human knowledge. nature Vol. 550 7676 (2017) 354--359.","DOI":"10.1038\/nature24270"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i06.6587"},{"key":"e_1_3_2_1_38_1","volume-title":"Practical bayesian optimization of machine learning algorithms. Advances in neural information processing systems","author":"Snoek Jasper","year":"2012","unstructured":"Jasper Snoek, Hugo Larochelle, and Ryan P Adams. 2012. Practical bayesian optimization of machine learning algorithms. Advances in neural information processing systems, Vol. 25 (2012)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3512798.3512815"},{"key":"e_1_3_2_1_40_1","volume-title":"Learning to synthesize programs as interpretable and generalizable policies. Advances in neural information processing systems","author":"Trivedi Dweep","year":"2021","unstructured":"Dweep Trivedi, Jesse Zhang, Shao-Hua Sun, and Joseph J Lim. 2021. Learning to synthesize programs as interpretable and generalizable policies. Advances in neural information processing systems, Vol. 34 (2021), 25146--25163."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450061"},{"key":"e_1_3_2_1_42_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Verma Abhinav","year":"2019","unstructured":"Abhinav Verma, Hoang Le, Yisong Yue, and Swarat Chaudhuri. 2019. Imitation-projected programmatic reinforcement learning. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_43_1","volume-title":"International Conference on Machine Learning. PMLR, 5045--5054","author":"Verma Abhinav","year":"2018","unstructured":"Abhinav Verma, Vijayaraghavan Murali, Rishabh Singh, Pushmeet Kohli, and Swarat Chaudhuri. 2018. Programmatically interpretable reinforcement learning. In International Conference on Machine Learning. PMLR, 5045--5054."},{"key":"e_1_3_2_1_44_1","volume-title":"Piecewise Linear Parametrization of Policies: Towards Interpretable Deep Reinforcement Learning. In The Twelfth International Conference on Learning Representations.","author":"Wabartha Maxime","year":"2024","unstructured":"Maxime Wabartha and Joelle Pineau. 2024. Piecewise Linear Parametrization of Policies: Towards Interpretable Deep Reinforcement Learning. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486020"},{"key":"e_1_3_2_1_46_1","volume-title":"10th USENIX Symposium on Networked Systems Design and Implementation (NSDI 13)","author":"Winstein Keith","year":"2013","unstructured":"Keith Winstein, Anirudh Sivaraman, and Hari Balakrishnan. 2013. Stochastic forecasts achieve high throughput and low delay over cellular networks. In 10th USENIX Symposium on Networked Systems Design and Implementation (NSDI 13). 459--471."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599240"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/577"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544243"},{"key":"e_1_3_2_1_50_1","volume-title":"IEEE INFOCOM","volume":"4","author":"Xu Lisong","year":"2004","unstructured":"Lisong Xu, Khaled Harfoush, and Injong Rhee. 2004. Binary increase congestion control (BIC) for fast long-distance networks. In IEEE INFOCOM 2004, Vol. 4. IEEE, 2514--2524."},{"key":"e_1_3_2_1_51_1","volume-title":"2018 USENIX Annual Technical Conference (USENIX ATC 18)","author":"Yan Francis Y","year":"2018","unstructured":"Francis Y Yan, Jestin Ma, Greg D Hill, Deepti Raghavan, Riad S Wahby, Philip Levis, and Keith Winstein. 2018. Pantheon: the training ground for Internet congestion-control research. In 2018 USENIX Annual Technical Conference (USENIX ATC 18). 731--743."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604838"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787498"}],"event":{"name":"WSDM '25: The Eighteenth ACM International Conference on Web Search and Data Mining","location":"Hannover Germany","acronym":"WSDM '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Eighteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703585","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701551.3703585","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T09:17:04Z","timestamp":1755767824000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703585"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,10]]},"references-count":53,"alternative-id":["10.1145\/3701551.3703585","10.1145\/3701551"],"URL":"https:\/\/doi.org\/10.1145\/3701551.3703585","relation":{},"subject":[],"published":{"date-parts":[[2025,3,10]]},"assertion":[{"value":"2025-03-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}