{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T18:02:20Z","timestamp":1761588140208,"version":"build-2065373602"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534010","type":"proceedings-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T17:27:41Z","timestamp":1632158861000},"page":"1-8","source":"Crossref","is-referenced-by-count":8,"title":["Learning to Optimise Routing Problems using Policy Optimisation"],"prefix":"10.1109","author":[{"given":"Nasrin","family":"Sultana","sequence":"first","affiliation":[{"name":"RMIT University, AU,School of Computing Technologies"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jeffrey","family":"Chan","sequence":"additional","affiliation":[{"name":"RMIT University, AU,School of Computing Technologies"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tabinda","family":"Sarwar","sequence":"additional","affiliation":[{"name":"RMIT University, AU,School of Computing Technologies"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"A. K.","family":"Qin","sequence":"additional","affiliation":[{"name":"Swinburne University of Technology, AU,School of Software and Electrical Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/BF00202749"},{"journal-title":"Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy","year":"2010","author":"ziebart","key":"ref11"},{"key":"ref12","article-title":"Learning vehicle routing problems using policy optimisation","author":"sultana","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Understanding the impact of entropy on policy optimization","author":"ahmed","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1287\/opre.6.6.791"},{"key":"ref15","article-title":"How to evaluate machine learning approaches for combinatorial optimization: Application to the travelling salesman problem","author":"fran\u00e7ois","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref16","article-title":"An efficient graph convolutional network technique for the travelling salesman problem","author":"joshi","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref17","article-title":"Learning to optimise general tsp instances","author":"sultana","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref18","article-title":"Pomo: Policy optimization with multiple optima for reinforcement learning","author":"kwon","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref19","first-page":"6281","article-title":"Learning to perform local rewriting for combinatorial optimization","author":"chen","year":"2019","journal-title":"Advances in neural information processing systems"},{"journal-title":"A comparison of exact and heuristic algorithms to solve the travelling salesman problem","year":"2018","author":"chatting","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/0-306-48056-5_16"},{"journal-title":"An Extension of the Lin-Kernighan-Helsgaun TSP Solver for Constrained Traveling Salesman and Vehicle Routing Problems","year":"2017","author":"helsgaun","key":"ref27"},{"key":"ref3","first-page":"2692","article-title":"Pointer networks","author":"vinyals","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref6","article-title":"Combinatorial optimization by graph pointer networks and hierarchical reinforcement learning","author":"ma","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93031-2_12"},{"key":"ref8","article-title":"Attention solves your tsp","author":"kool","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref7","first-page":"9861","article-title":"Reinforcement learning for solving the vehicle routing problem","author":"nazari","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref2","first-page":"6348","article-title":"Learning combinatorial optimization algorithms over graphs","author":"khalil","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref1","article-title":"Neural combinatorial optimization with reinforcement learning","author":"bello","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref20","article-title":"A learning-based iterative method for solving vehicle routing problems","author":"lu","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref22","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume":"8","author":"ziebart","year":"2008","journal-title":"AAAI"},{"key":"ref21","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref24","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref23","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"journal-title":"The Traveling Salesman Problem A Computational Study","year":"2006","author":"applegate","key":"ref26"},{"key":"ref25","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534010.pdf?arnumber=9534010","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T17:56:19Z","timestamp":1761587779000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534010\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534010","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}