{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T23:12:55Z","timestamp":1768518775915,"version":"3.49.0"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61503217"],"award-info":[{"award-number":["61503217"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2019]]},"DOI":"10.1109\/access.2019.2932047","type":"journal-article","created":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T15:56:40Z","timestamp":1564675000000},"page":"107744-107756","source":"Crossref","is-referenced-by-count":10,"title":["Which Channel to Ask My Question?: Personalized Customer Service Request Stream Routing Using Deep Reinforcement Learning"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4969-2098","authenticated-orcid":false,"given":"Zining","family":"Liu","sequence":"first","affiliation":[]},{"given":"Chong","family":"Long","sequence":"additional","affiliation":[]},{"given":"Xiaolu","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Zehong","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yafang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ENERGYCON.2016.7514015"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2017.04.013"},{"key":"ref33","article-title":"Meta-learning with temporal convolutions","author":"mishra","year":"2017","journal-title":"arXiv 1707 03141"},{"key":"ref32","article-title":"Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation","author":"wu","year":"2017","journal-title":"arXiv 1708 05144"},{"key":"ref31","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref30","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv 1509 02971"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/72.238311"},{"key":"ref36","article-title":"The mirage of action-dependent baselines in reinforcement learning","author":"tucker","year":"2018","journal-title":"arXiv 1802 10031"},{"key":"ref35","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"arXiv 1801 01290"},{"key":"ref34","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"rXiv 1802 09477"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2897028"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.12783\/dtcse\/aiea2017\/15003"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2818678"},{"key":"ref12","article-title":"A long short-term memory recurrent neural network framework for network traffic matrix prediction","author":"azzouni","year":"2017","journal-title":"arXiv 1705 05690"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2879361"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8485853"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.424"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2900076"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2019.2894437"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3211954.3211957"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(95)00026-C"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref4","article-title":"Prioritized sequence experience replay","author":"schaul","year":"2015","journal-title":"arXiv 1905 12726"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref3","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v074.i11"},{"key":"ref29","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc 33nd Int Conf Mach Learn"},{"key":"ref5","first-page":"1167","article-title":"Classes of multiagent Q-learning dynamics with epsilon-greedy exploration","author":"wunder","year":"2010","journal-title":"Proc ICML"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2018.1700417"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2018.1700293"},{"key":"ref2","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Proc AAAI"},{"key":"ref9","article-title":"Imagetext dual neural network with decision strategy for small-sample image classification","volume":"328","author":"zhu","year":"2018","journal-title":"Neurocomputing"},{"key":"ref1","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2016.2601622"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0907-4"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"1309","DOI":"10.1109\/TCSVT.2014.2381471","article-title":"Background prior-based salient object detection via deep reconstruction residual","volume":"25","author":"han","year":"2015","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-9473(01)00065-2"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.02.099"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2567393"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44668-0_93"},{"key":"ref26","article-title":"Playing atari games with deep reinforcement learning and human checkpoint replay","author":"hosu","year":"2016","journal-title":"arXiv 1607 05077"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2016.2576598"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8600701\/08784156.pdf?arnumber=8784156","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T11:32:57Z","timestamp":1641987177000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8784156\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/access.2019.2932047","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]}}}