{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T08:10:08Z","timestamp":1751098208807,"version":"3.41.0"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031959752","type":"print"},{"value":"9783031959769","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-95976-9_9","type":"book-chapter","created":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T07:53:41Z","timestamp":1751097221000},"page":"137-154","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning-Based Heuristics to\u00a0Guide Domain-Independent Dynamic Programming"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2808-6056","authenticated-orcid":false,"given":"Minori","family":"Narita","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3753-1644","authenticated-orcid":false,"given":"Ryo","family":"Kuroiwa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4656-8908","authenticated-orcid":false,"given":"J. Christopher","family":"Beck","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,29]]},"reference":[{"issue":"8","key":"9_CR1","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1038\/s42256-019-0070-z","volume":"1","author":"F Agostinelli","year":"2019","unstructured":"Agostinelli, F., McAleer, S., Shmakov, A., Baldi, P.: Solving the Rubik\u2019s cube with deep reinforcement learning and search. Nat. Mach. Intell. 1(8), 356\u2013363 (2019). https:\/\/doi.org\/10.1038\/s42256-019-0070-z","journal-title":"Nat. Mach. Intell."},{"issue":"6","key":"9_CR2","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran, K., Deisenroth, M.P., Brundage, M., Bharath, A.A.: Deep reinforcement learning: a brief survey. IEEE Signal Process. Mag. 34(6), 26\u201338 (2017). https:\/\/doi.org\/10.1109\/MSP.2017.2743240","journal-title":"IEEE Signal Process. Mag."},{"issue":"5","key":"9_CR3","doi-asserted-by":"publisher","first-page":"618","DOI":"10.1016\/j.orl.2008.04.006","volume":"36","author":"A Atamt\u00fcrk","year":"2008","unstructured":"Atamt\u00fcrk, A., Narayanan, V.: Polymatroids and mean-risk minimization in discrete optimization. Oper. Res. Lett. 36(5), 618\u2013622 (2008). https:\/\/doi.org\/10.1016\/j.orl.2008.04.006","journal-title":"Oper. Res. Lett."},{"key":"9_CR4","volume-title":"Dynamic Programming","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: Dynamic Programming. Princetion University Press, Princeton (1957)"},{"key":"9_CR5","unstructured":"Bello, I., Pham, H., Le, Q.V., Norouzi, M., Bengio, S.: Neural combinatorial optimization with reinforcement learning. In: The Fifth International Conference on Learning Representations (2017). https:\/\/openreview.net\/forum?id=rJY3vK9eg"},{"key":"9_CR6","doi-asserted-by":"publisher","unstructured":"Boisvert, L., Verhaeghe, H., Cappart, Q.: Towards a generic representation of combinatorial problems for learning-based approaches. In: International Conference on the Integration of Constraint Programming, Artificial Intelligence, and Operations Research, pp. 99\u2013108. Springer, Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-60597-0_7","DOI":"10.1007\/978-3-031-60597-0_7"},{"issue":"130","key":"9_CR7","first-page":"1","volume":"24","author":"Q Cappart","year":"2023","unstructured":"Cappart, Q., Ch\u00e9telat, D., Khalil, E.B., Lodi, A., Morris, C., Veli\u010dkovi\u0107, P.: Combinatorial optimization and reasoning with graph neural networks. J. Mach. Learn. Res. 24(130), 1\u201361 (2023)","journal-title":"J. Mach. Learn. Res."},{"key":"9_CR8","doi-asserted-by":"publisher","unstructured":"Cappart, Q., Moisan, T., Rousseau, L.M., Pr\u00e9mont-Schwarz, I., Cire, A.A.: Combining reinforcement learning and constraint programming for combinatorial optimization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 3677\u20133687. AAAI Press (2021). https:\/\/doi.org\/10.1609\/aaai.v35i5.16484","DOI":"10.1609\/aaai.v35i5.16484"},{"key":"9_CR9","unstructured":"Dai, H., Khalil, E.B., Zhang, Y., Dilkina, B., Song, L.: Learning combinatorial optimization algorithms over graphs. In: Advances in Neural Information Processing Systems, pp. 6351\u20136361. Curran Associates Inc. (2017)"},{"issue":"1","key":"9_CR10","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1287\/opre.4.1.61","volume":"4","author":"MM Flood","year":"1956","unstructured":"Flood, M.M.: The traveling-salesman problem. Oper. Res. 4(1), 61\u201375 (1956). https:\/\/doi.org\/10.1287\/opre.4.1.61","journal-title":"Oper. Res."},{"key":"9_CR11","doi-asserted-by":"publisher","unstructured":"Fu, Z.H., Qiu, K.B., Zha, H.: Generalize a small pre-trained model to arbitrarily large TSP instances. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 7474\u20137482 (2021). https:\/\/doi.org\/10.1609\/aaai.v35i8.16916","DOI":"10.1609\/aaai.v35i8.16916"},{"key":"9_CR12","doi-asserted-by":"publisher","unstructured":"Gehring, C., et al.: Reinforcement learning for classical planning: viewing heuristics as dense reward generators. In: Proceedings of the International Conference on Automated Planning and Scheduling, vol.\u00a032, pp. 588\u2013596 (2022). https:\/\/doi.org\/10.1609\/icaps.v32i1.19846","DOI":"10.1609\/icaps.v32i1.19846"},{"key":"9_CR13","unstructured":"Huang, S., Onta\u00f1\u00f3n, S.: A closer look at invalid action masking in policy gradient algorithms. arXiv preprint arXiv:2006.14171 (2020)"},{"key":"9_CR14","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996). https:\/\/doi.org\/10.1613\/jair.301","journal-title":"J. Artif. Intell. Res."},{"key":"9_CR15","doi-asserted-by":"publisher","unstructured":"Kool, W., van Hoof, H., Gromicho, J., Welling, M.: Deep policy dynamic programming for vehicle routing problems. In: International Conference on Integration of Constraint Programming, Artificial Intelligence, and Operations Research, pp. 190\u2013213. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-08011-1_14","DOI":"10.1007\/978-3-031-08011-1_14"},{"key":"9_CR16","unstructured":"Kool, W., van Hoof, H., Welling, M.: Attention, learn to solve routing problems! In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=ByxBFsRqYm"},{"key":"9_CR17","doi-asserted-by":"publisher","unstructured":"Kuroiwa, R., Beck, J.C.: Domain-independent dynamic programming: generic state space search for combinatorial optimization. In: Proceedings of the 33rd International Conference on Automated Planning and Scheduling (ICAPS), vol.\u00a033, pp. 236\u2013244. AAAI Press (2023). https:\/\/doi.org\/10.1609\/icaps.v33i1.27200","DOI":"10.1609\/icaps.v33i1.27200"},{"key":"9_CR18","doi-asserted-by":"publisher","unstructured":"Kuroiwa, R., Beck, J.C.: Solving domain-independent dynamic programming problems with anytime heuristic search. In: Proceedings of the 33rd International Conference on Automated Planning and Scheduling (ICAPS). AAAI Press (2023). https:\/\/doi.org\/10.1609\/icaps.v33i1.27201","DOI":"10.1609\/icaps.v33i1.27201"},{"key":"9_CR19","unstructured":"Kuroiwa, R., Beck, J.C.: Domain-independent dynamic programming. arXiv preprint arXiv:2401.13883 (2024)"},{"key":"9_CR20","unstructured":"Lee, J., Lee, Y., Kim, J., Kosiorek, A., Choi, S., Teh, Y.W.: Set transformer: a framework for attention-based permutation-invariant neural networks. In: Proceedings of the 36th International Conference on Machine Learning, vol.\u00a097, pp. 3744\u20133753. PMLR (2019). https:\/\/proceedings.mlr.press\/v97\/lee19d.html"},{"key":"9_CR21","unstructured":"Li, S., Wang, R., Tang, M., Zhang, C.: Hierarchical reinforcement learning with advantage-based auxiliary rewards. In: Advances in Neural Information Processing Systems. Curran Associates Inc., Red Hook, NY, USA (2019)"},{"key":"9_CR22","volume-title":"Knapsack Problems: Algorithms and Computer Implementations","author":"S Martello","year":"1990","unstructured":"Martello, S., Toth, P.: Knapsack Problems: Algorithms and Computer Implementations. Wiley, Hoboken (1990)"},{"key":"9_CR23","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd International Conference on Machine Learning, vol.\u00a048, pp. 1928\u20131937. PMLR (2016). https:\/\/proceedings.mlr.press\/v48\/mniha16.html"},{"key":"9_CR24","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"9_CR25","unstructured":"Nazari, M., Oroojlooy, A., Tak\u00e1\u010d, M., Snyder, L.V.: Reinforcement learning for solving the vehicle routing problem. In: Advances in Neural Information Processing Systems, vol.\u00a031, pp. 9861\u20139871. Curran Associates, Inc. (2018)"},{"key":"9_CR26","unstructured":"Orseau, L., Lelis, L., Lattimore, T., Weber, T.: Single-agent policy tree search with guarantees. In: Advances in Neural Information Processing Systems, vol.\u00a031. Curran Associates, Inc. (2018)"},{"key":"9_CR27","doi-asserted-by":"publisher","unstructured":"Orseau, L., Lelis, L.H.: Policy-guided heuristic search with guarantees. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 12382\u201312390. AAAI Press (2021). https:\/\/doi.org\/10.1609\/aaai.v35i14.17469","DOI":"10.1609\/aaai.v35i14.17469"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Puterman, M.L.: Markov decision processes. In: Handbooks in Operations Research and Management Science, vol.\u00a02, pp. 331\u2013434. Elsevier (1990)","DOI":"10.1016\/S0927-0507(05)80172-0"},{"key":"9_CR29","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1007\/BF02022044","volume":"4","author":"MW Savelsbergh","year":"1985","unstructured":"Savelsbergh, M.W.: Local search in routing problems with time windows. Ann. Oper. Res. 4, 285\u2013305 (1985). https:\/\/doi.org\/10.1007\/BF02022044","journal-title":"Ann. Oper. Res."},{"key":"9_CR30","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning, vol.\u00a037, pp. 1889\u20131897. PMLR (2015). https:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"9_CR31","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"9_CR32","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529, 484\u2013503 (2016). https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"key":"9_CR33","unstructured":"Silver, D., et al.: Mastering chess and shogi by self-play with a general reinforcement learning algorithm. arXiv preprint arXiv:1712.01815 (2017)"},{"key":"9_CR34","unstructured":"Sun, Z., Yang, Y.: Difusco: graph-based diffusion solvers for combinatorial optimization. In: Advances in Neural Information Processing Systems, vol.\u00a036, pp. 3706\u20133731. Curran Associates Inc. (2023)"},{"key":"9_CR35","unstructured":"Sutton, R.S., Barto, A.G.: The reinforcement learning problem. In: Reinforcement Learning: An Introduction, pp. 51\u201385. MIT Press (1998)"},{"key":"9_CR36","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Li\u00f2, P., Bengio, Y.: Graph attention networks. In: International Conference on Learning Representations (2018). https:\/\/openreview.net\/forum?id=rJXMpikCZ"},{"key":"9_CR37","unstructured":"Zaheer, M., Kottur, S., Ravanbakhsh, S., Poczos, B., Salakhutdinov, R.R., Smola, A.J.: Deep sets. In: Advances in Neural Information Processing Systems, vol.\u00a030, pp. 3394\u20133404. Curran Associates Inc. (2017)"},{"key":"9_CR38","unstructured":"Zhang, C., Song, W., Cao, Z., Zhang, J., Tan, P.S., Xu, C.: Learning to dispatch for job shop scheduling via deep reinforcement learning. In: Advances in Neural Information Processing Systems, vol.\u00a033, pp. 1621\u20131632. Curran Associates Inc. (2020)"}],"container-title":["Lecture Notes in Computer Science","Integration of Constraint Programming, Artificial Intelligence, and Operations Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-95976-9_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T07:53:48Z","timestamp":1751097228000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-95976-9_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031959752","9783031959769"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-95976-9_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"29 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CPAIOR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on the Integration of Constraint Programming, Artificial Intelligence, and Operations Research","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Melbourne, VIC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 November 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cpaior2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/sites.google.com\/view\/cpaior2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}