{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T10:11:25Z","timestamp":1781518285660,"version":"3.54.1"},"reference-count":198,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"MoE-CMCC Artificial Intelligence Project","award":["MCM20190701"],"award-info":[{"award-number":["MCM20190701"]}]},{"DOI":"10.13039\/501100002766","name":"BUPT Excellent Ph.D. Students Foundation","doi-asserted-by":"publisher","award":["CX2021112"],"award-info":[{"award-number":["CX2021112"]}],"id":[{"id":"10.13039\/501100002766","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Commun. Surv. Tutorials"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/comst.2021.3102580","type":"journal-article","created":{"date-parts":[[2021,8,5]],"date-time":"2021-08-05T20:08:18Z","timestamp":1628194098000},"page":"2064-2097","source":"Crossref","is-referenced-by-count":116,"title":["Leveraging Deep Reinforcement Learning for Traffic Engineering: A Survey"],"prefix":"10.1109","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6897-5531","authenticated-orcid":false,"given":"Yang","family":"Xiao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4007-6109","authenticated-orcid":false,"given":"Jun","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiawei","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8541-3565","authenticated-orcid":false,"given":"Nirwan","family":"Ansari","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1364\/JOCN.5.00A100"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2923702"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1117\/12.2306087"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2947291"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/TNSE.2020.2978856"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2016.1600492CM"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2716952"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2018.2815360"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2017.2750030"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2431731"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/TGCN.2017.2771724"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2016.7579021"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.17487\/rfc3784"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.17487\/rfc2328"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.17487\/rfc2453"},{"key":"ref30","first-page":"88","article-title":"Study and performance of interior gateway IP routing protocols","volume":"2","author":"sendra","year":"2010","journal-title":"Netw Protocols Algorithms"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2005.1413640"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2016.2614247"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2011.092311.00071"},{"key":"ref34","first-page":"33","author":"rutgers","year":"1991","journal-title":"An introduction to IGRP"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2017.2664665"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2017.123"},{"key":"ref185","author":"chen","year":"2019","journal-title":"Incremental reinforcement learning&#x2014;A new continuous reinforcement learning frame based on stochastic differential equation methods"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2903273"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2971172"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.7763\/IJMLC.2015.V5.489"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.36.823"},{"key":"ref187","year":"2021","journal-title":"OpenAI Gym"},{"key":"ref186","author":"hernandez-leal","year":"2017","journal-title":"A Survey of Learning in Multiagent Environments Dealing With Non-Stationarity"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2988367"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2805379"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/1355734.1355746"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2904897"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.2965856"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2926625"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2018.1800109"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MVT.2019.2903655"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2790388"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TGCN.2021.3049500"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref154","first-page":"1","article-title":"Delay-based congestion control for multipath TCP","author":"cao","year":"2012","journal-title":"Proc 20th IEEE Int Conf Netw Protocols (ICNP)"},{"key":"ref153","first-page":"99","article-title":"Design, implementation and evaluation of congestion control for multipath TCP","volume":"11","author":"wischik","year":"2011","journal-title":"Proc USENIX Symp Netw Syst Design Implement (NSDI)"},{"key":"ref156","first-page":"2829","article-title":"Continuous deep Q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref155","author":"paasch","year":"2021","journal-title":"MultiPath TCP in the Linux Kernel"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-20757-0_35"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2014.2379698"},{"key":"ref151","article-title":"Coupled multipath-aware congestion control","author":"raiciu","year":"2010","journal-title":"IETF Internet-Draft"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1145\/1400097.1400105"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1002\/sat.799"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1145\/190314.190317"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2007.12.007"},{"key":"ref59","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref58","author":"schaul","year":"2016","journal-title":"Prioritized experience replay"},{"key":"ref57","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","volume":"30","author":"van hasselt","year":"2016","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.5626\/KTCP.2018.24.12.670"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3004394"},{"key":"ref52","author":"lample","year":"2016","journal-title":"Playing fps games with deep reinforcement learning"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/4235.996017"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2004.826067"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.011.1900303"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1145\/2740070.2626334"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2019.1800386"},{"key":"ref162","article-title":"Smart resource allocation for mobile edge computing: A deep reinforcement learning approach","author":"wang","year":"2019","journal-title":"IEEE Trans Emerg Topics Comput"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/ISWCS.2018.8491089"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/JLT.2019.2923615"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.17487\/rfc3272"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TGCN.2018.2837618"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2008.060548"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2012.100412.00017"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2902846"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2008.080406"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2320099"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2881964"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-017-4917-1"},{"key":"ref45","first-page":"671","article-title":"Packet routing in dynamically changing networks: A reinforcement learning approach","volume":"6","author":"boyan","year":"1993","journal-title":"Advances in neural information processing systems"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2020.3036911"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/SCC.2016.12"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-017-0468-y"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref44","volume":"37","author":"rummery","year":"1994","journal-title":"On-line Q-learning using connectionist systems"},{"key":"ref43","article-title":"Learning from delayed rewards","author":"watkins","year":"1989"},{"key":"ref73","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref72","author":"wu","year":"2017","journal-title":"Scalable trust-region method for deep reinforcement learning using Kronecker-factored approximation"},{"key":"ref71","author":"wang","year":"2016","journal-title":"Sample efficient actor-critic with experience replay"},{"key":"ref70","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref76","author":"horgan","year":"2018","journal-title":"Distributed prioritized experience replay"},{"key":"ref77","first-page":"2613","article-title":"Double Q-learning","volume":"23","author":"van hasselt","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref74","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref75","first-page":"1407","article-title":"IMPALA: Scalable distributed deep-RL with importance weighted actor-learner architectures","author":"espeholt","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref78","first-page":"2892","article-title":"Distributional reinforcement learning with quantile regression","volume":"32","author":"dabney","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref79","first-page":"601","article-title":"A class of gradient-estimation algorithms for reinforcement learning in neural networks","author":"williams","year":"1987","journal-title":"Proc Int Conf Neural Netw"},{"key":"ref60","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref62","first-page":"449","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"2017","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref61","author":"fortunato","year":"2017","journal-title":"Noisy Networks for Exploration"},{"key":"ref63","first-page":"3215","article-title":"Rainbow: Combining improvements in deep reinforcement learning","volume":"32","author":"hessel","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref64","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref65","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref66","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref67","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref68","author":"barth-maron","year":"2018","journal-title":"Distributed Distributional Deterministic Policy Gradients"},{"key":"ref69","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref197","author":"wang","year":"2019","journal-title":"Benchmarking model-based reinforcement learning"},{"key":"ref198","first-page":"1","article-title":"Reinforcement learning for multi-hop scheduling and routing of real-time flows","author":"hasanzadezonuzy","year":"2020","journal-title":"Proc Int Symp Model Optim Mobile Ad-Hoc Wireless Netw (WiOpt)"},{"key":"ref193","first-page":"4218","article-title":"Machine theory of mind","author":"rabinowitz","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref195","author":"ba?ar","year":"1998","journal-title":"Dynamic Noncooperative Game Theory"},{"key":"ref196","author":"janner","year":"2019","journal-title":"When to trust your model Model-based policy optimization"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CompComm.2018.8780950"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref94","first-page":"14","article-title":"Deep reinforcement learning based QoS-aware routing in knowledge-defined networking","author":"pham","year":"2018","journal-title":"Proc Int Conf Heterogeneous Netw Qual Rel Security Robustness"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8485853"},{"key":"ref191","author":"kim","year":"2019","journal-title":"Learning to Schedule Communication in Multi-agent Reinforcement Learning"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2018.1800097"},{"key":"ref192","first-page":"746","article-title":"The dynamics of reinforcement learning in cooperative multiagent systems","volume":"2","author":"claus","year":"1998","journal-title":"Proc 15th Conf Artif Intell Innovat Appl Artif Intell (AAAI\/IAAI)"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2877686"},{"key":"ref90","author":"stampa","year":"2017","journal-title":"A Deep-Reinforcement Learning Approach for Software-Defined Networking Routing Optimization"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOMW.2019.8845154"},{"key":"ref99","author":"mao","year":"2017","journal-title":"Accnet Actor-coordinator-critic net for &#x201C;learning-to-communicate&#x201D; with deep multi-agent reinforcement learning"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.23919\/WiOPT47501.2019.9144110"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2904539"},{"key":"ref82","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref81","author":"schulman","year":"2015","journal-title":"High-dimensional continuous control using generalized advantage estimation"},{"key":"ref84","article-title":"A natural policy gradient","author":"kakade","year":"2001","journal-title":"Advances in neural information processing systems"},{"key":"ref83","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proc 32nd Int Conf Mach Learn"},{"key":"ref80","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"99","author":"sutton","year":"1999","journal-title":"Proc Int Conf Adv Neural Inf Process Syst (NIPS)"},{"key":"ref89","author":"haarnoja","year":"2018","journal-title":"Soft actor-critic algorithms and applications"},{"key":"ref85","author":"munos","year":"2016","journal-title":"Safe and efficient off-policy reinforcement learning"},{"key":"ref86","first-page":"2408","article-title":"Optimizing neural networks with Kronecker-factored approximate curvature","author":"martens","year":"2015","journal-title":"Proc 32nd Int Conf Mach Learn"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1162\/089976698300017746"},{"key":"ref88","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1002\/itl2.99"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230551"},{"key":"ref127","doi-asserted-by":"crossref","first-page":"27s","DOI":"10.1109\/MCOM.2005.1404595","article-title":"TCP in wireless environments: Problems and solutions","volume":"43","author":"ye","year":"2005","journal-title":"IEEE Commun Mag"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2904994"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486031"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2017.04.018"},{"key":"ref129","first-page":"766","article-title":"Cellular network traffic scheduling with deep reinforcement learning","author":"chinchali","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761737"},{"key":"ref130","first-page":"3050","article-title":"A deep reinforcement learning perspective on Internet congestion control","author":"jay","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904358"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737649"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2892046"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933761"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOMW.2012.6193510"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2018.8377374"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2015.7249156"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/3199902.3199911"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/3009824"},{"key":"ref140","first-page":"343","article-title":"PCC Vivace: Online-learning congestion control","author":"dong","year":"2018","journal-title":"Proc 15th USENIX Symp Netw Syst Design Implement (NSDI)"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1145\/3232755.3232783"},{"key":"ref142","first-page":"330","article-title":"Multi-agent reinforcement learning: Independent versus cooperative agent","author":"tam","year":"1993","journal-title":"Proc 10th Int Conf Mach Learn"},{"key":"ref143","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume":"29","author":"foerster","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref144","author":"omidshafiei","year":"2017","journal-title":"Deep decentralized multi-task multi-agent reinforcement learning under partial observability"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2014.6863129"},{"key":"ref145","article-title":"The newreno modification to TCP&#x2019;s fast recovery algorithm","author":"floyd","year":"2004","journal-title":"IETF RFC 3782"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSPEC.2004.1309810"},{"key":"ref109","year":"2021","journal-title":"Discrete-event Network Simulator for Internet Systems"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486020"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2015.2427091"},{"key":"ref106","year":"2021","journal-title":"Sprint IP network performance"},{"key":"ref105","first-page":"130","article-title":"A dynamic load balancing method of cloud-center based on SDN","volume":"13","author":"wang","year":"2016","journal-title":"China Commun"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1145\/3152434.3152441"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1145\/1096536.1096551"},{"key":"ref102","first-page":"1","article-title":"Discrete event simulation system","author":"varga","year":"2001","journal-title":"European Simulation Multiconference (ESM)"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1145\/1005686.1005697"},{"key":"ref112","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref110","year":"2012","journal-title":"The Internet Topology Zoo"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2494502"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2727878"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2866942"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2699343.2699349"},{"key":"ref14","first-page":"24","article-title":"Big data analytics for network intrusion detection: A survey","volume":"7","author":"wang","year":"2017","journal-title":"International Journal of Networks and Communications"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2016.1600317WC"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2707140"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2011.122211.00017"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3092831"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2558203"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2846401"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2844341"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1145\/1879141.1879175"},{"key":"ref114","author":"zhang","year":"2018","journal-title":"Fully decentralized multi-agent reinforcement learning with networked agents"},{"key":"ref113","author":"foerster","year":"2017","journal-title":"Stabilising experience replay for deep multi-agent reinforcement learning"},{"key":"ref116","author":"mahmood","year":"2017","journal-title":"Multi-step off-policy learning without importance sampling ratios"},{"key":"ref115","author":"de asis","year":"2018","journal-title":"Per-decision multi-step temporal difference learning with control variates"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1145\/1851182.1851192"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2017.8057082"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2017.2672974"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1145\/2934872.2934890"}],"container-title":["IEEE Communications Surveys &amp; Tutorials"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9739\/9621320\/09507541.pdf?arnumber=9507541","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:52:25Z","timestamp":1652194345000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9507541\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":198,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/comst.2021.3102580","relation":{},"ISSN":["1553-877X","2373-745X"],"issn-type":[{"value":"1553-877X","type":"electronic"},{"value":"2373-745X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}