{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T04:32:23Z","timestamp":1752985943815,"version":"3.37.3"},"reference-count":14,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,10,9]],"date-time":"2020-10-09T00:00:00Z","timestamp":1602201600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,9]],"date-time":"2020-10-09T00:00:00Z","timestamp":1602201600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,10,9]],"date-time":"2020-10-09T00:00:00Z","timestamp":1602201600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,10,9]]},"DOI":"10.1109\/icca51439.2020.9264518","type":"proceedings-article","created":{"date-parts":[[2020,11,30]],"date-time":"2020-11-30T21:36:44Z","timestamp":1606772204000},"page":"1332-1339","source":"Crossref","is-referenced-by-count":8,"title":["Multi-Agent Reinforcement Learning for Adaptive Routing: A Hybrid Method using Eligibility Traces"],"prefix":"10.1109","author":[{"given":"Siliang","family":"Zeng","sequence":"first","affiliation":[]},{"given":"Xingfei","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Chen","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"758","article-title":"Confidence based dual reinforcement q-routing: An adaptive online network routing algorithm","volume":"99","author":"kumar","year":"1999","journal-title":"IJCAI"},{"key":"ref11","article-title":"A multi-agent, policy-gradient approach to network routing","author":"tao","year":"2001","journal-title":"Proc of the 18th Int Conf on Machine Learning"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2002.1007796"},{"key":"ref13","article-title":"An Analysis of Actor\/Critic Algorithms Using Eligibility Traces: Reinforcement Learning with Imperfect Value Function","author":"kimura","year":"1998","journal-title":"ICML"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114726"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"key":"ref5","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref8","first-page":"671","article-title":"Packet routing in dynamically changing networks: A reinforcement learning approach","author":"boyan","year":"1994","journal-title":"Advances in neural information processing systems"},{"key":"ref7","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Advances in neural information processing systems"},{"article-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3243173"},{"key":"ref9","first-page":"945","article-title":"Predictive Q-routing: A memory-based reinforcement learning approach to adaptive traffic control","author":"choi","year":"1996","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2020 IEEE 16th International Conference on Control & Automation (ICCA)","start":{"date-parts":[[2020,10,9]]},"location":"Singapore","end":{"date-parts":[[2020,10,11]]}},"container-title":["2020 IEEE 16th International Conference on Control &amp; Automation (ICCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9264304\/9264308\/09264518.pdf?arnumber=9264518","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T00:07:40Z","timestamp":1656374860000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9264518\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,9]]},"references-count":14,"URL":"https:\/\/doi.org\/10.1109\/icca51439.2020.9264518","relation":{},"subject":[],"published":{"date-parts":[[2020,10,9]]}}}