{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T17:51:17Z","timestamp":1772560277946,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,5,4]],"date-time":"2022-05-04T00:00:00Z","timestamp":1651622400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,5,4]],"date-time":"2022-05-04T00:00:00Z","timestamp":1651622400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2021YFA1000403"],"award-info":[{"award-number":["2021YFA1000403"]}]},{"DOI":"10.13039\/501100010903","name":"Key Programme","doi-asserted-by":"publisher","award":["11991022"],"award-info":[{"award-number":["11991022"]}],"id":[{"id":"10.13039\/501100010903","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010903","name":"Key Programme","doi-asserted-by":"publisher","award":["U19B2040"],"award-info":[{"award-number":["U19B2040"]}],"id":[{"id":"10.13039\/501100010903","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s10489-022-03453-z","type":"journal-article","created":{"date-parts":[[2022,5,4]],"date-time":"2022-05-04T22:03:42Z","timestamp":1651701822000},"page":"2010-2025","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Solving uncapacitated P-Median problem with reinforcement learning assisted by graph attention networks"],"prefix":"10.1007","volume":"53","author":[{"given":"Chenguang","family":"Wang","sequence":"first","affiliation":[]},{"given":"Congying","family":"Han","sequence":"additional","affiliation":[]},{"given":"Tiande","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Man","family":"Ding","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,4]]},"reference":[{"key":"3453_CR1","doi-asserted-by":"crossref","unstructured":"Guo T, Han C, Tang S, Ding M (2019) Solving combinatorial problems with machine learning methods. In: Nonlinear Combinatorial Optimization. Springer, pp 207\u2013229","DOI":"10.1007\/978-3-030-16194-1_9"},{"key":"3453_CR2","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction. MIT press"},{"key":"3453_CR3","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"issue":"3-4","key":"3453_CR4","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine learning 8(3-4):229\u2013256","journal-title":"Machine learning"},{"key":"3453_CR5","unstructured":"Gurobi Optimization LLC (2021) Gurobi Optimizer Reference Manual. https:\/\/www.gurobi.com"},{"key":"3453_CR6","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1016\/j.dib.2018.01.008","volume":"17","author":"M Cebecauer","year":"2018","unstructured":"Cebecauer M, Buzna L (2018) Large-scale test data set for location problems. Data in brief 17:267\u2013274","journal-title":"Data in brief"},{"issue":"7540","key":"3453_CR7","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. nature 518(7540):529\u2013533","journal-title":"nature"},{"issue":"3-4","key":"3453_CR8","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins CJCH, Dayan P (1992) Q-learning. Machine learning 8(3-4):279\u2013292","journal-title":"Machine learning"},{"key":"3453_CR9","unstructured":"Konda VR, Tsitsiklis JN (2000) Actor-critic algorithms. In: Advances in neural information processing systems, pp 1008\u20131014"},{"key":"3453_CR10","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms. In: International conference on machine learning, PMLR, pp 387\u2013395"},{"key":"3453_CR11","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: International conference on machine learning, PMLR, pp 1889\u20131897"},{"key":"3453_CR12","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv:1509.02971"},{"key":"3453_CR13","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv:1707.06347"},{"key":"3453_CR14","unstructured":"Babaeizadeh M, Frosio I, Tyree S, Clemons J, Kautz J (2016) Reinforcement learning through asynchronous advantage actor-critic on a gpu. arXiv:1611.06256"},{"issue":"1","key":"3453_CR15","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine S, Finn C, Darrell T, Abbeel P (2016) End-to-end training of deep visuomotor policies. The Journal of Machine Learning Research 17(1):1334\u20131373","journal-title":"The Journal of Machine Learning Research"},{"issue":"3","key":"3453_CR16","doi-asserted-by":"publisher","first-page":"653","DOI":"10.1109\/TNNLS.2016.2522401","volume":"28","author":"Y Deng","year":"2016","unstructured":"Deng Y, Bao F, Kong Y, Ren Z, Dai Q (2016) Deep direct reinforcement learning for financial signal representation and trading. IEEE transactions on neural networks and learning systems 28(3):653\u2013664","journal-title":"IEEE transactions on neural networks and learning systems"},{"key":"3453_CR17","doi-asserted-by":"crossref","unstructured":"Zheng G, Zhang F, Zheng Z, Xiang Y, Yuan NJ, Xie X, Li Z (2018) Drn: A deep reinforcement learning framework for news recommendation. In: Proceedings of the 2018 World Wide Web Conference, pp 167\u2013176","DOI":"10.1145\/3178876.3185994"},{"issue":"7587","key":"3453_CR18","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M et al (2016) Mastering the game of go with deep neural networks and tree search. nature 529(7587):484\u2013489","journal-title":"nature"},{"issue":"6419","key":"3453_CR19","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver D, Hubert T, Schrittwieser J, Antonoglou I, Lai M, Guez A, Lanctot M, Sifre L, Kumaran D, Graepel T et al (2018) A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419):1140\u20131144","journal-title":"Science"},{"issue":"7839","key":"3453_CR20","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser J, Antonoglou I, Hubert T, Simonyan K, Sifre L, Schmitt S, Guez A, Lockhart E, Hassabis D, Graepel T et al (2020) Mastering atari, go, chess and shogi by planning with a learned model. Nature 588(7839):604\u2013609","journal-title":"Nature"},{"key":"3453_CR21","unstructured":"Jin C, Allen-Zhu Z, Bubeck S, Jordan MI (2018) Is q-learning provably efficient?. arXiv:1807.03765"},{"key":"3453_CR22","unstructured":"Jin C, Liu Q, Miryoosefi S (2021) Bellman eluder dimension: New rich classes of rl problems, and sample-efficient algorithms. arXiv:2102.00815"},{"key":"3453_CR23","unstructured":"Duan Y, Jin C, Li Z (2021) Risk bounds and rademacher complexity in batch reinforcement learning. arXiv:2103.13883"},{"issue":"6","key":"3453_CR24","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran K, Deisenroth MP, Brundage M, Bharath AA (2017) Deep reinforcement learning: A brief survey. IEEE Signal Proc Mag 34(6):26\u201338","journal-title":"IEEE Signal Proc Mag"},{"key":"3453_CR25","doi-asserted-by":"crossref","unstructured":"Mousavi SS, Schukat M, Howley E (2016) Deep reinforcement learning: an overview. In: Proceedings of SAI Intelligent Systems Conference, Springer, pp 426\u2013440","DOI":"10.1007\/978-3-319-56991-8_32"},{"issue":"9","key":"3453_CR26","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen TT, Nguyen ND, Nahavandi S (2020) Deep reinforcement learning for multiagent systems: A review of challenges, solutions, and applications. IEEE transactions on cybernetics 50(9):3826\u20133839","journal-title":"IEEE transactions on cybernetics"},{"issue":"6","key":"3453_CR27","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1109\/TETCI.2020.3007905","volume":"4","author":"M Asim","year":"2020","unstructured":"Asim M, Wang Y, Wang K, Huang P-Q (2020) A review on computational intelligence techniques in cloud and edge computing. IEEE Transactions on Emerging Topics in Computational Intelligence 4 (6):742\u2013763","journal-title":"IEEE Transactions on Emerging Topics in Computational Intelligence"},{"key":"3453_CR28","unstructured":"Vinyals O, Fortunato M, Jaitly N (2015) Pointer networks. In: Advances in neural information processing systems, pp 2692\u20132700"},{"key":"3453_CR29","unstructured":"Lu H, Zhang X, Yang S (2019) A learning-based iterative method for solving vehicle routing problems. In: International Conference on Learning Representations"},{"key":"3453_CR30","unstructured":"Manchanda S, Mittal A, Dhawan A, Medya S, Ranu S, Singh A (2019) Learning heuristics over large graphs via deep reinforcement learning. arXiv:1903.03332"},{"key":"3453_CR31","doi-asserted-by":"crossref","unstructured":"Mazyavkina N, Sviridov S, Ivanov S, Burnaev E (2021) Reinforcement learning for combinatorial optimization: A survey. Computers & Operations Research, p 105400","DOI":"10.1016\/j.cor.2021.105400"},{"key":"3453_CR32","doi-asserted-by":"crossref","unstructured":"Cappart Q, Ch\u00e9telat D, Khalil E, Lodi A, Morris C, Veli\u010dkovi\u0107 P (2021) Combinatorial optimization and reasoning with graph neural networks. arXiv:2102.09544","DOI":"10.24963\/ijcai.2021\/595"},{"key":"3453_CR33","first-page":"22","volume":"1050","author":"A Nowak","year":"2017","unstructured":"Nowak A, Villar S, Bandeira AS, Bruna J (2017) A note on learning algorithms for quadratic assignment with graph neural networks. stat 1050:22","journal-title":"stat"},{"key":"3453_CR34","unstructured":"Kool W, Van Hoof H, Welling M (2019) Attention, learn to solve routing problems!. 7th International Conference on Learning Representations, ICLR 2019, pp 1\u201325. 1803.08475"},{"key":"3453_CR35","unstructured":"Veli\u010dkovi\u0107 P, Cucurull G, Casanova A, Romero A, Lio P, Bengio Y (2017) Graph attention networks. arXiv:1710.10903"},{"key":"3453_CR36","doi-asserted-by":"crossref","unstructured":"Wu Y, Song W, Cao Z, Zhang J, Lim A (2021) Learning improvement heuristics for solving routing problems. IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2021.3068828"},{"key":"3453_CR37","doi-asserted-by":"crossref","unstructured":"Fu Z-H, Qiu K-B, Zha H (2020) Generalize a small pre-trained model to arbitrarily large tsp instances. arXiv:2012.10658","DOI":"10.1609\/aaai.v35i8.16916"},{"key":"3453_CR38","doi-asserted-by":"crossref","unstructured":"Kool W, van Hoof H, Gromicho J, Welling M (2021) Deep policy dynamic programming for vehicle routing problems. arXiv:2102.11756","DOI":"10.1007\/978-3-031-08011-1_14"},{"issue":"4","key":"3453_CR39","doi-asserted-by":"publisher","first-page":"100023","DOI":"10.1016\/j.ejtl.2020.100023","volume":"9","author":"A Lodi","year":"2020","unstructured":"Lodi A, Mossina L, Rachelson E (2020) Learning to handle parameter perturbations in combinatorial optimization: an application to facility location. EURO Journal on Transportation and Logistics 9 (4):100023","journal-title":"EURO Journal on Transportation and Logistics"},{"key":"3453_CR40","unstructured":"Gamrath G, Anderson D, Bestuzheva K, Chen W-K, Eifler L, Gasse M, Gemander P, Gleixner A, Gottwald L, Halbig K et al (2020) The scip optimization suite 7.0"},{"issue":"2","key":"3453_CR41","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1016\/j.ejor.2020.07.063","volume":"290","author":"Y Bengio","year":"2021","unstructured":"Bengio Y, Lodi A, Prouvost A (2021) Machine learning for combinatorial optimization: a methodological tour d\u2019horizon. Eur J Oper Res 290(2):405\u2013421","journal-title":"Eur J Oper Res"},{"key":"3453_CR42","doi-asserted-by":"publisher","first-page":"120388","DOI":"10.1109\/ACCESS.2020.3004964","volume":"8","author":"N Vesselinova","year":"2020","unstructured":"Vesselinova N, Steinert R, Perez-Ramirez DF, Boman M (2020) Learning combinatorial optimization on graphs: A survey with applications to networking. IEEE Access 8:120388\u2013120416","journal-title":"IEEE Access"},{"issue":"2","key":"3453_CR43","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1007\/s41019-021-00155-3","volume":"6","author":"Y Peng","year":"2021","unstructured":"Peng Y, Choi B, Xu J (2021) Graph learning for combinatorial optimization: A survey of state-of-the-art. Data Science and Engineering 6(2):119\u2013141","journal-title":"Data Science and Engineering"},{"key":"3453_CR44","unstructured":"Shazeer N, Lan Z, Cheng Y, Ding N, Hou L (2020) Talking-heads attention. arXiv:2003.02436"},{"key":"3453_CR45","unstructured":"Joshi CK, Cappart Q, Rousseau L-M, Laurent T, Bresson X (2020) Learning tsp requires rethinking generalization. arXiv:2006.07054"},{"key":"3453_CR46","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. arXiv:1412.6980"},{"issue":"2605","key":"3453_CR47","first-page":"2579","volume":"9","author":"VD Maaten","year":"2008","unstructured":"Laurens, Maaten VD, Geoffrey H (2008) Visualizing data using t-sne. J Mach Learn Res 9 (2605):2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"3453_CR48","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03453-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03453-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03453-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,4]],"date-time":"2023-01-04T04:51:53Z","timestamp":1672807913000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03453-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,4]]},"references-count":48,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["3453"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03453-z","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,5,4]]},"assertion":[{"value":"26 February 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 May 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}