{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T23:03:43Z","timestamp":1773702223750,"version":"3.50.1"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030876258","type":"print"},{"value":"9783030876265","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-87626-5_1","type":"book-chapter","created":{"date-parts":[[2021,9,29]],"date-time":"2021-09-29T06:14:35Z","timestamp":1632896075000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["RP-DQN: An Application of Q-Learning to Vehicle Routing Problems"],"prefix":"10.1007","author":[{"given":"Ahmad","family":"Bdeir","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simon","family":"Boeder","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Dernedde","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kirill","family":"Tkachuk","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonas K.","family":"Falkner","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lars","family":"Schmidt-Thieme","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,30]]},"reference":[{"key":"1_CR1","unstructured":"Bello, I., Pham, H., Le, Q.V., Norouzi, M., Bengio, S.: Neural combinatorial optimization with reinforcement learning. CoRR (2016). http:\/\/arxiv.org\/abs\/1611.09940"},{"key":"1_CR2","unstructured":"Chen, X., Tian, Y.: Learning to perform local rewriting for combinatorial optimization. In: Advances in Neural Information Processing Systems, vol. 32. Curran Associates, Inc. (2019)"},{"key":"1_CR3","unstructured":"Dai, H., Dai, B., Song, L.: Discriminative embeddings of latent variable models for structured data. In: Proceedings of the 33rd International Conference on International Conference on Machine Learning, ICML 2016, vol. 48, pp. 2702\u20132711 (2016)"},{"key":"1_CR4","unstructured":"Delarue, A., Anderson, R., Tjandraatmadja, C.: Reinforcement learning with combinatorial actions: an application to vehicle routing. In: Advances in Neural Information Processing Systems, vol. 33, pp. 609\u2013620. Curran Associates, Inc. (2020)"},{"key":"1_CR5","unstructured":"Falkner, J.K., Schmidt-Thieme, L.: Learning to solve vehicle routing problems with time windows through joint attention (2020). http:\/\/arxiv.org\/abs\/2006.09100"},{"key":"1_CR6","unstructured":"Gurobi Optimization, LLC: Gurobi optimizer reference manual (2021)"},{"key":"1_CR7","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: ICML (2018)"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, AAAI 2016, pp. 2094\u20132100. AAAI Press (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"1_CR9","doi-asserted-by":"publisher","unstructured":"Helsgaun, K.: An extension of the Lin-Kernighan-Helsgaun TSP solver for constrained traveling salesman and vehicle routing problems (2017). https:\/\/doi.org\/10.13140\/RG.2.2.25569.40807","DOI":"10.13140\/RG.2.2.25569.40807"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Hessel, M., et al.: Rainbow: combining improvements in deep reinforcement learning. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"1_CR11","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Proceedings of the 32nd International Conference on Machine Learning, Proceedings of Machine Learning Research, Lille, France, vol. 37, pp. 448\u2013456. PMLR (2015)"},{"key":"1_CR12","unstructured":"Joshi, C.K., Laurent, T., Bresson, X.: An efficient graph convolutional network technique for the travelling salesman problem. CoRR (2019). http:\/\/arxiv.org\/abs\/1906.01227"},{"key":"1_CR13","unstructured":"Khalil, E., Dai, H., Zhang, Y., Dilkina, B., Song, L.: Learning combinatorial optimization algorithms over graphs. In: Advances in Neural Information Processing Systems, vol. 30. Curran Associates, Inc. (2017)"},{"key":"1_CR14","unstructured":"Kool, W., van Hoof, H., Gromicho, J., Welling, M.: Deep policy dynamic programming for vehicle routing problems (2021). http:\/\/arxiv.org\/abs\/2102.11756"},{"key":"1_CR15","unstructured":"Kool, W., van Hoof, H., Welling, M.: Attention, learn to solve routing problems! In: International Conference on Learning Representations (2019)"},{"issue":"2","key":"1_CR16","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1287\/opre.21.2.498","volume":"21","author":"S Lin","year":"1973","unstructured":"Lin, S., Kernighan, B.W.: An effective heuristic algorithm for the Traveling-Salesman problem. Oper. Res. 21(2), 498\u2013516 (1973). https:\/\/doi.org\/10.1287\/opre.21.2.498","journal-title":"Oper. Res."},{"key":"1_CR17","doi-asserted-by":"publisher","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236. ISSN 00280836","DOI":"10.1038\/nature14236"},{"key":"1_CR18","unstructured":"Nazari, M.R., Oroojlooy, A., Snyder, L., Takac, M.: Reinforcement learning for solving the vehicle routing problem. In: Advances in Neural Information Processing Systems, vol. 31. Curran Associates, Inc. (2018)"},{"key":"1_CR19","unstructured":"Perron, L., Furnon, V.: OR-Tools 7.2 (2019)"},{"key":"1_CR20","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/978-1-4020-9688-4_3","volume-title":"Fundamental Problems in Computing","author":"DJ Rosenkrantz","year":"2009","unstructured":"Rosenkrantz, D.J., Stearns, R.E., Lewis, P.M.: An analysis of several heuristics for the Traveling Salesman problem. In: Ravi, S.S., Shukla, S.K. (eds.) Fundamental Problems in Computing, pp. 45\u201369. Springer, Dordrecht (2009). https:\/\/doi.org\/10.1007\/978-1-4020-9688-4_3"},{"key":"1_CR21","unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay (2015). http:\/\/arxiv.org\/abs\/1511.05952"},{"key":"1_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1007\/3-540-49481-2_30","volume-title":"Principles and Practice of Constraint Programming \u2014 CP98","author":"P Shaw","year":"1998","unstructured":"Shaw, P.: Using constraint programming and local search methods to solve vehicle routing problems. In: Maher, M., Puget, J.-F. (eds.) CP 1998. LNCS, vol. 1520, pp. 417\u2013431. Springer, Heidelberg (1998). https:\/\/doi.org\/10.1007\/3-540-49481-2_30"},{"key":"1_CR23","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. The MIT Press (2018)"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Toth, P., Vigo, D.: Vehicle Routing: Problems, Methods, and Applications, 2nd edn. No. 18 in MOS-SIAM Series on Optimization, SIAM (2014). ISBN 9781611973587","DOI":"10.1137\/1.9781611973594"},{"key":"1_CR25","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30, Curran Associates, Inc. (2017)"},{"key":"1_CR26","unstructured":"Vinyals, O., Fortunato, M., Jaitly, N.: Pointer networks. In: Advances in Neural Information Processing Systems, vol. 28, Curran Associates, Inc. (2015)"},{"issue":"2","key":"1_CR27","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/S0377-2217(98)00099-X","volume":"113","author":"C Voudouris","year":"1999","unstructured":"Voudouris, C., Tsang, E.: Guided local search and its application to the Traveling Salesman problem. Eur. J. Oper. Res. 113(2), 469\u2013499 (1999). https:\/\/doi.org\/10.1016\/S0377-2217(98)00099-X","journal-title":"Eur. J. Oper. Res."},{"issue":"3\u20134","key":"1_CR28","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"1_CR29","unstructured":"Wu, Y., Song, W., Cao, Z., Zhang, J., Lim, A.: Learning improvement heuristics for solving routing problems (2020). http:\/\/arxiv.org\/abs\/1912.05784"},{"key":"1_CR30","doi-asserted-by":"publisher","unstructured":"Zhao, J., Mao, M., Zhao, X., Zou, J.: A hybrid of deep reinforcement learning and local search for the vehicle routing problems. IEEE Trans. Intell. Trans. Syst. 1\u201311 (2020). https:\/\/doi.org\/10.1109\/TITS.2020.3003163","DOI":"10.1109\/TITS.2020.3003163"}],"container-title":["Lecture Notes in Computer Science","KI 2021: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-87626-5_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T18:21:57Z","timestamp":1673374917000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-87626-5_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030876258","9783030876265"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-87626-5_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"30 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"German Conference on Artificial Intelligence (K\u00fcnstliche Intelligenz)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 October 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"44","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ki2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ki2021.uni-luebeck.de\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"59","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}