{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T23:34:09Z","timestamp":1771976049643,"version":"3.50.1"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,10]],"date-time":"2021-05-10T00:00:00Z","timestamp":1620604800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,10]],"date-time":"2021-05-10T00:00:00Z","timestamp":1620604800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,10]],"date-time":"2021-05-10T00:00:00Z","timestamp":1620604800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,10]]},"DOI":"10.1109\/infocom42981.2021.9488736","type":"proceedings-article","created":{"date-parts":[[2021,7,27]],"date-time":"2021-07-27T00:07:32Z","timestamp":1627344452000},"page":"1-10","source":"Crossref","is-referenced-by-count":73,"title":["DRL-OR: Deep Reinforcement Learning-based Online Routing for Multi-type Service Requirements"],"prefix":"10.1109","author":[{"given":"Chenyi","family":"Liu","sequence":"first","affiliation":[]},{"given":"Mingwei","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Yuan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Nan","family":"Geng","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"14070","article-title":"Reinforcement learning with convex constraints","author":"miryoosefi","year":"2019","journal-title":"Proceedings of NIPS"},{"key":"ref38","first-page":"671","article-title":"Packet routing in dynamically changing networks: A reinforcement learning approach","author":"boyan","year":"1994","journal-title":"Proceedings of NIPS"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3098822.3098843"},{"key":"ref32","first-page":"3050","article-title":"A deep reinforcement learning perspective on internet congestion control","author":"jay","year":"2019","journal-title":"Proceedings of ICML"},{"key":"ref31","article-title":"Qos routing for supporting resource reservation","author":"wang","year":"1996","journal-title":"IEEE JSAC"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1111322.1111341"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2016.1600317WC"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3229607.3229610"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2017.2709742"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LCN.2018.8638099"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486011"},{"key":"ref40","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"Proceedings of ICML"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2656877.2656890"},{"key":"ref12","first-page":"945","article-title":"Predictive q-routing: A memory-based reinforcement learning approach to adaptive traffic control","author":"choi","year":"1996","journal-title":"Proceedings of NIPS"},{"key":"ref13","first-page":"796","article-title":"Qelar: A machine-learning-based adaptive routing protocol for energy-efficient and lifetime-extended underwater sensor networks","volume":"9","author":"hu","year":"2010","journal-title":"IEEE TMC"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.adhoc.2018.08.003"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8485853"},{"key":"ref16","first-page":"265","article-title":"Evaluating and boosting reinforcement learningfor intra-domain routing","author":"xu","year":"2019","journal-title":"MASS"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3152434.3152441"},{"key":"ref18","article-title":"Towards safe online reinforcement learning in computer systems","author":"mao","year":"2019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1868447.1868466"},{"key":"ref28","article-title":"Pytorch implementations of reinforcement learning algorithms","author":"kostrikov","year":"2018"},{"key":"ref4","first-page":"80","article-title":"Distributed quality-of-service routing in high-speed networks based on selective probing","author":"chen","year":"1998","journal-title":"Proc in LCN"},{"key":"ref27","article-title":"Ryu","year":"2020"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2749760"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2001.916274"},{"key":"ref29","article-title":"Abilene","year":"2004"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.1999.751451"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2012.2187305"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3341216.3342213"},{"key":"ref2","first-page":"11","article-title":"Supporting differentiated service classes: queue scheduling disciplines","author":"semeria","year":"2001","journal-title":"Juniper Networks"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1109\/JPROC.2014.2371999","article-title":"Software-defined networking: A comprehensive survey","volume":"103","author":"kreutz","year":"2014","journal-title":"Proceedings of the IEEE"},{"key":"ref1","article-title":"Rfc2475: An architecture for differentiated service","author":"blake","year":"1998"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486020"},{"key":"ref22","first-page":"5867","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"2018","journal-title":"Proceedings of ICML"},{"key":"ref21","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.bjp.2013.11.004"},{"key":"ref23","article-title":"Highdimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2017.2731419"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2017.2657643"}],"event":{"name":"IEEE INFOCOM 2021 - IEEE Conference on Computer Communications","location":"Vancouver, BC, Canada","start":{"date-parts":[[2021,5,10]]},"end":{"date-parts":[[2021,5,13]]}},"container-title":["IEEE INFOCOM 2021 - IEEE Conference on Computer Communications"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9488422\/9488423\/09488736.pdf?arnumber=9488736","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:43:41Z","timestamp":1652197421000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9488736\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,10]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/infocom42981.2021.9488736","relation":{},"subject":[],"published":{"date-parts":[[2021,5,10]]}}}