{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T04:07:15Z","timestamp":1747886835735,"version":"3.41.0"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100000266","name":"Commonwealth Scholarship Commission and the U.K. Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/X012301\/1","EP\/X04047X\/1","EP\/X04047X\/2","EP\/Y037243\/1"],"award-info":[{"award-number":["EP\/X012301\/1","EP\/X04047X\/1","EP\/X04047X\/2","EP\/Y037243\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3569093","type":"journal-article","created":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T17:45:14Z","timestamp":1747071914000},"page":"85217-85230","source":"Crossref","is-referenced-by-count":0,"title":["Design Principles for Reinforcement Learning in Congestion Control Environments"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5749-2371","authenticated-orcid":false,"given":"Lincoln","family":"Kamau Kiarie","sequence":"first","affiliation":[{"name":"Wolfson School of Mechanical, Electrical and Manufacturing Engineering, Loughborough University, Loughborough, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6997-045X","authenticated-orcid":false,"given":"Mahsa","family":"Derakhshani","sequence":"additional","affiliation":[{"name":"Wolfson School of Mechanical, Electrical and Manufacturing Engineering, Loughborough University, Loughborough, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7498-4589","authenticated-orcid":false,"given":"Konstantinos G.","family":"Kyriakopoulos","sequence":"additional","affiliation":[{"name":"Wolfson School of Mechanical, Electrical and Manufacturing Engineering, Loughborough University, Loughborough, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Computer Networking: A Top-Down Approach","year":"2021","author":"Kurose","key":"ref1"},{"key":"ref2","article-title":"Internet congestion control benchmarking","author":"Abbasloo","year":"2023","journal-title":"arXiv:2307.10054"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604838"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1400097.1400105"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3232755.3232783"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1851275.1851192"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2904994"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3012426.3022184"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2740070.2626324"},{"key":"ref10","article-title":"Iroko: A framework to prototype reinforcement learning for data center traffic control","author":"Ruffy","year":"2018","journal-title":"arXiv:1812.09975"},{"key":"ref11","first-page":"3050","article-title":"A deep reinforcement learning perspective on Internet congestion control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jay"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3387514.3405892"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155250"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM42981.2021.9488851"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544243"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10228952"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2020.3036958"},{"key":"ref18","article-title":"Gymnasium: A standard interface for reinforcement learning environments","author":"Towers","year":"2024","journal-title":"arXiv:2407.17032"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref19"},{"key":"ref20","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCN49398.2020.9209750"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM52122.2024.10621288"},{"key":"ref23","first-page":"731","article-title":"Pantheon: The training ground for Internet congestion-control research","volume-title":"Proc. USENIX Annu. Tech. Conf. (ATC)","author":"Yan"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ColComCon.2014.6860404"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2008.44"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2019.106872"},{"key":"ref28","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"issue":"19","key":"ref29","first-page":"1","article-title":"Breaking the curse of dimensionality with convex neural networks","volume":"18","author":"Bach","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref30","article-title":"MVFST-RL: An asynchronous RL framework for congestion control with delayed actions","author":"Sivakumar","year":"2019","journal-title":"arXiv:1910.04054"},{"key":"ref31","first-page":"2490","article-title":"Park: An open platform for learning-augmented computer systems","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Mao"},{"key":"ref32","first-page":"9460","article-title":"Defining and characterizing reward gaming","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Skalse"},{"key":"ref33","first-page":"1","article-title":"A quantitative measure of fairness and discrimination","volume":"21","author":"Jain","year":"1984"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09421-1"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11000309.pdf?arnumber=11000309","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T05:06:48Z","timestamp":1747804008000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11000309\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3569093","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}