{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:41:23Z","timestamp":1772822483395,"version":"3.50.1"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,5,31]],"date-time":"2023-05-31T00:00:00Z","timestamp":1685491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,31]],"date-time":"2023-05-31T00:00:00Z","timestamp":1685491200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"NSF","award":["CBET-1554018"],"award-info":[{"award-number":["CBET-1554018"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Optim Lett"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s11590-023-02009-5","type":"journal-article","created":{"date-parts":[[2023,5,31]],"date-time":"2023-05-31T10:02:16Z","timestamp":1685527336000},"page":"1993-2020","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["A deep reinforcement learning framework for solving two-stage stochastic programs"],"prefix":"10.1007","volume":"18","author":[{"given":"Dogacan","family":"Yilmaz","sequence":"first","affiliation":[]},{"given":"\u0130. Esra","family":"B\u00fcy\u00fcktahtak\u0131n","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,31]]},"reference":[{"key":"2009_CR1","doi-asserted-by":"crossref","first-page":"104941","DOI":"10.1016\/j.cor.2020.104941","volume":"119","author":"B Abbasi","year":"2020","unstructured":"Abbasi, B., Babaei, T., Hosseinifard, Z., Smith-Miles, K., Dehghani, M.: Predicting solutions of large-scale optimization problems via machine learning: a case study in blood supply chain management. Comput. Oper. Res. 119, 104941 (2020)","journal-title":"Comput. Oper. Res."},{"key":"2009_CR2","unstructured":"Afshar, R.R., Zhang, Y., Firat, M., Kaymak, U.: A state aggregation approach for solving knapsack problem with deep reinforcement learning. In: Asian Conference on Machine Learning. Proceedings of Machine Learning Research, Cambridge, MA, pp. 81\u201396 (2020)"},{"issue":"2","key":"2009_CR3","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1007\/s10107-003-0475-6","volume":"100","author":"S Ahmed","year":"2004","unstructured":"Ahmed, S., Tawarmalani, M., Sahinidis, N.V.: A finite branch-and-bound algorithm for two-stage stochastic integer programs. Math. Program. 100(2), 355\u2013377 (2004)","journal-title":"Math. Program."},{"issue":"1","key":"2009_CR4","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1007\/s10479-006-0150-4","volume":"150","author":"Y Ak\u00e7ay","year":"2007","unstructured":"Ak\u00e7ay, Y., Li, H., Susan, H.X.: Greedy algorithm for the general multidimensional knapsack problem. Ann. Oper. Res. 150(1), 17\u201329 (2007)","journal-title":"Ann. Oper. Res."},{"key":"2009_CR5","doi-asserted-by":"publisher","DOI":"10.12785\/IJCDS\/040207","author":"M Al-Emran","year":"2015","unstructured":"Al-Emran, M.: Hierarchical reinforcement learning: a survey. Int. J. Comput. Digit. Syst. (2015). https:\/\/doi.org\/10.12785\/IJCDS\/040207","journal-title":"Int. J. Comput. Digit. Syst."},{"issue":"5","key":"2009_CR6","doi-asserted-by":"crossref","first-page":"1166","DOI":"10.3390\/en11051166","volume":"11","author":"M Amoasi Acquah","year":"2018","unstructured":"Amoasi Acquah, M., Kodaira, D., Han, S.: Real-time demand side management algorithm using stochastic optimization. Energies 11(5), 1166 (2018)","journal-title":"Energies"},{"issue":"3","key":"2009_CR7","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1287\/ijoc.2016.0695","volume":"28","author":"G Angulo","year":"2016","unstructured":"Angulo, G., Ahmed, S., Dey, S.S.: Improving the integer l-shaped method. INFORMS J. Comput. 28(3), 483\u2013499 (2016)","journal-title":"INFORMS J. Comput."},{"key":"2009_CR8","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate (2014). arXiv preprint arXiv:1409.0473"},{"key":"2009_CR9","unstructured":"Balaji, B., Bell-Masterson, J., Bilgin, E., Damianou, A., Garcia, P.M., Jain, A., Luo, R., Maggiar, A., Narayanaswamy, B., Orl, C.Y.: Reinforcement learning benchmarks for online stochastic optimization problems (2019). arXiv preprint arXiv:1911.10641"},{"issue":"1","key":"2009_CR10","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1057\/palgrave.jors.2601652","volume":"55","author":"G Barbaroso\u01e7lu","year":"2004","unstructured":"Barbaroso\u01e7lu, G., Arda, Y.: A two-stage stochastic programming framework for transportation planning in disaster response. J. Oper. Res. Soc. 55(1), 43\u201353 (2004)","journal-title":"J. Oper. Res. Soc."},{"key":"2009_CR11","unstructured":"Bello, I., Pham, H., Le, Q.V., Norouzi, M., Bengio, S.: Neural combinatorial optimization with reinforcement learning (2016). arXiv preprint arXiv:1611.09940"},{"key":"2009_CR12","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Frejinger, E., Lodi, A., Patel, R., Sankaranarayanan, S.: A learning-based algorithm to quickly compute good primal solutions for stochastic integer programs. In: International Conference on Integration of Constraint Programming, Artificial Intelligence, and Operations Research, pp. 99\u2013111. Springer, Cham (2020)","DOI":"10.1007\/978-3-030-58942-4_7"},{"issue":"4","key":"2009_CR13","doi-asserted-by":"crossref","first-page":"2229","DOI":"10.1287\/ijoc.2022.1181","volume":"34","author":"D Bertsimas","year":"2022","unstructured":"Bertsimas, D., Stellato, B.: Online mixed-integer optimization in milliseconds. INFORMS J. Comput. 34(4), 2229\u20132248 (2022)","journal-title":"INFORMS J. Comput."},{"key":"2009_CR14","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4614-0237-4","volume-title":"Introduction to Stochastic Programming","author":"JR Birge","year":"2011","unstructured":"Birge, J.R., Louveaux, F.V.: Introduction to Stochastic Programming. Springer, New York (2011)"},{"key":"2009_CR15","unstructured":"Bogyrbayeva, A., Meraliyev, M., Mustakhov, T., Dauletbayev, B.: Learning to solve vehicle routing problems: a survey (2022). arXiv preprint arXiv:2205.02453"},{"key":"2009_CR16","doi-asserted-by":"crossref","unstructured":"Bushaj, S., B\u00fcy\u00fcktahtak\u0131n, \u0130.E.: A K-means supported reinforcement learning algorithm to solve multi-dimensional knapsack problem. (2023, Under review)","DOI":"10.1007\/s10898-024-01364-6"},{"issue":"3","key":"2009_CR17","doi-asserted-by":"crossref","first-page":"1094","DOI":"10.1016\/j.ejor.2021.08.035","volume":"299","author":"S Bushaj","year":"2022","unstructured":"Bushaj, S., B\u00fcy\u00fcktahtak\u0131n, \u0130E., Haight, R.G.: Risk-averse multi-stage stochastic optimization for surveillance and operations planning of a forest insect infestation. Eur. J. Oper. Res. 299(3), 1094\u20131110 (2022)","journal-title":"Eur. J. Oper. Res."},{"key":"2009_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-022-04926-7","author":"S Bushaj","year":"2022","unstructured":"Bushaj, S., Yin, X., Beqiri, A., Andrews, D., B\u00fcy\u00fcktahtak\u0131n, \u0130E.: A simulation-deep reinforcement learning (SiRL) approach for epidemic control optimization. Ann. Oper. Res. (2022). https:\/\/doi.org\/10.1007\/s10479-022-04926-7","journal-title":"Ann. Oper. Res."},{"issue":"2","key":"2009_CR19","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Bu\u015foniu","year":"2008","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.) 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.)"},{"key":"2009_CR20","first-page":"183","volume":"1","author":"L Bu\u015foniu","year":"2010","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., De Schutter, B.: Multi-agent reinforcement learning: an overview. Innov. Multi-agent Syst. Appl.-1 1, 183 (2010)","journal-title":"Innov. Multi-agent Syst. Appl.-1"},{"issue":"1","key":"2009_CR21","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10479-021-04388-3","volume":"309","author":"\u0130E B\u00fcy\u00fcktahtak\u0131n","year":"2022","unstructured":"B\u00fcy\u00fcktahtak\u0131n, \u0130E.: Stage-t scenario dominance for risk-averse multi-stage stochastic mixed-integer programs. Ann. Oper. Res. 309(1), 1\u201335 (2022)","journal-title":"Ann. Oper. Res."},{"key":"2009_CR22","doi-asserted-by":"crossref","DOI":"10.1016\/j.cor.2023.106149","volume":"153","author":"\u0130E B\u00fcy\u00fcktahtak\u0131n","year":"2023","unstructured":"B\u00fcy\u00fcktahtak\u0131n, \u0130E.: Scenario-dominance to multi-stage stochastic lot-sizing and knapsack problems. Comput. Oper. Res. 153, 106149 (2023)","journal-title":"Comput. Oper. Res."},{"key":"2009_CR23","doi-asserted-by":"crossref","first-page":"2227","DOI":"10.1016\/j.procs.2015.05.501","volume":"51","author":"PJS Cardoso","year":"2015","unstructured":"Cardoso, P.J.S., Sch\u00fctz, G., Mazayev, A., Ey, E., Corr\u00eaa, T.: A solution for a real-time stochastic capacitated vehicle routing problem with time windows. Procedia Comput. Sci. 51, 2227\u20132236 (2015)","journal-title":"Procedia Comput. Sci."},{"issue":"1\u20132","key":"2009_CR24","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/S0167-6377(98)00050-9","volume":"24","author":"CC Car\u00f8e","year":"1999","unstructured":"Car\u00f8e, C.C., Schultz, R.: Dual decomposition in stochastic integer programming. Oper. Res. Lett. 24(1\u20132), 37\u201345 (1999)","journal-title":"Oper. Res. Lett."},{"key":"2009_CR25","first-page":"6281","volume":"32","author":"X Chen","year":"2019","unstructured":"Chen, X., Tian, Y.: Learning to perform local rewriting for combinatorial optimization. Adv. Neural. Inf. Process. Syst. 32, 6281\u20136292 (2019)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"2009_CR26","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1016\/S0377-2217(96)90062-4","volume":"89","author":"SY Chius","year":"1996","unstructured":"Chius, S.Y., Lu, L., Cox, L.A., Jr.: Optimal access control for broadband services: stochastic knapsack with advance information. Eur. J. Oper. Res. 89(1), 127\u2013134 (1996)","journal-title":"Eur. J. Oper. Res."},{"key":"2009_CR27","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1016\/j.cie.2017.02.017","volume":"107","author":"HI Cobuloglu","year":"2017","unstructured":"Cobuloglu, H.I., B\u00fcy\u00fcktahtak\u0131n, \u0130E.: A two-stage stochastic mixed-integer programming approach to the competition of biofuel and food production. Comput. Ind. Eng. 107, 251\u2013263 (2017)","journal-title":"Comput. Ind. Eng."},{"key":"2009_CR28","unstructured":"Cohn, A.M., Barnhart, C.: The stochastic knapsack problem with random weights: a heuristic approach to robust transportation planning. In: Proceedings of the Triennial Symposium on Transportation Analysis, vol. 3, pp. 17\u201323 (1998)"},{"key":"2009_CR29","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1016\/j.apenergy.2018.09.195","volume":"232","author":"JL Crespo-Vazquez","year":"2018","unstructured":"Crespo-Vazquez, J.L., Carrillo, C., Diaz-Dorado, E., Martinez-Lorenzo, J.A., Noor-E-Alam, Md.: A machine learning based stochastic optimization framework for a wind and storage power plant participating in energy pool market. Appl. Energy 232, 341\u2013357 (2018)","journal-title":"Appl. Energy"},{"key":"2009_CR30","doi-asserted-by":"crossref","unstructured":"Costa, P.R.O., Rhuggenaath, J., Zhang, Y., Akcay, A.: Learning 2-opt heuristics for the traveling salesman problem via deep reinforcement learning. In: Asian Conference on Machine Learning, pp. 465\u2013480. PMLR (2020)","DOI":"10.1007\/s42979-021-00779-2"},{"issue":"3\u20134","key":"2009_CR31","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1287\/mnsc.1.3-4.197","volume":"1","author":"GB Dantzig","year":"1955","unstructured":"Dantzig, G.B.: Linear programming under uncertainty. Manag. Sci. 1(3\u20134), 197\u2013206 (1955)","journal-title":"Manag. Sci."},{"key":"2009_CR32","first-page":"609","volume":"33","author":"A Delarue","year":"2020","unstructured":"Delarue, A., Anderson, R., Tjandraatmadja, C.: Reinforcement learning with combinatorial actions: an application to vehicle routing. Adv. Neural. Inf. Process. Syst. 33, 609\u2013620 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2009_CR33","doi-asserted-by":"crossref","unstructured":"Deudon, M., Cournut, P., Lacoste, A., Adulyasak, Y., Rousseau, L.-M.: Learning heuristics for the TSP by policy gradient. In: Integration of Constraint Programming, Artificial Intelligence, and Operations Research, pp. 170\u2013181. Springer, Cham (2018). ISBN 978-3-319-93031-2","DOI":"10.1007\/978-3-319-93031-2_12"},{"key":"2009_CR34","unstructured":"Ding, L., Ahmed, S., Shapiro, A.: A python package for multi-stage stochastic programming. In:Optimization Online (2019)"},{"key":"2009_CR35","unstructured":"Duan, L., Hu, H., Qian, Y., Gong, Y., Zhang, X., Wei, J., Xu, Y.: A multi-task selected learning approach for solving 3D flexible bin packing problem. In: Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, pp. 1386\u20131394 (2019)"},{"issue":"4","key":"2009_CR36","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1007\/s10287-006-0026-8","volume":"4","author":"CI F\u00e1bi\u00e1n","year":"2007","unstructured":"F\u00e1bi\u00e1n, C.I., Sz\u0151ke, Z.: Solving two-stage stochastic programming problems with level decomposition. CMS 4(4), 313\u2013353 (2007)","journal-title":"CMS"},{"key":"2009_CR37","doi-asserted-by":"crossref","unstructured":"Feng, Y., Niazadeh, R., Saberi, A.: Two-stage stochastic matching with application to ride hailing. In: Proceedings of the 2021 ACM-SIAM Symposium on Discrete Algorithms, pp. 2862\u20132877. Society for Industrial and Applied Mathematics, Philadelphia (2021)","DOI":"10.1137\/1.9781611976465.170"},{"key":"2009_CR38","unstructured":"Frejinger, E., Larsen, E.: A language processing algorithm for predicting tactical solutions to an operational planning problem under uncertainty (2019). arXiv preprint arXiv:1910.08216"},{"issue":"1","key":"2009_CR39","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1007\/s10107-012-0615-y","volume":"144","author":"D Gade","year":"2014","unstructured":"Gade, D., K\u00fc\u00e7\u00fckyavuz, S., Sen, S.: Decomposition algorithms with parametric Gomory cuts for two-stage stochastic integer programs. Math. Program. 144(1), 39\u201364 (2014)","journal-title":"Math. Program."},{"issue":"1","key":"2009_CR40","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1007\/s10107-016-1000-z","volume":"157","author":"D Gade","year":"2016","unstructured":"Gade, D., Hackebeil, G., Ryan, S.M., Watson, J.-P., Wets, R.J.-B., Woodruff, D.L.: Obtaining lower bounds from the progressive hedging algorithm for stochastic mixed-integer programs. Math. Program. 157(1), 47\u201367 (2016)","journal-title":"Math. Program."},{"issue":"3","key":"2009_CR41","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1007\/s10898-010-9566-0","volume":"49","author":"AA Gaivoronski","year":"2011","unstructured":"Gaivoronski, A.A., Lisser, A., Lopez, R., Xu, H.: Knapsack problem with probability constraints. J. Glob. Optim. 49(3), 397\u2013413 (2011)","journal-title":"J. Glob. Optim."},{"issue":"11","key":"2009_CR42","doi-asserted-by":"crossref","first-page":"3739","DOI":"10.1002\/aic.15032","volume":"61","author":"J Gao","year":"2015","unstructured":"Gao, J., You, F.: Deciphering and handling uncertainty in shale gas supply chain design and optimization: novel modeling framework and computationally efficient solution algorithm. AIChE J. 61(11), 3739\u20133755 (2015)","journal-title":"AIChE J."},{"issue":"2","key":"2009_CR43","doi-asserted-by":"crossref","first-page":"557","DOI":"10.1007\/s10479-018-2880-5","volume":"284","author":"E Grass","year":"2020","unstructured":"Grass, E., Fischer, K., Rams, A.: An accelerated l-shaped method for solving two-stage stochastic programs in disaster management. Ann. Oper. Res. 284(2), 557\u2013582 (2020)","journal-title":"Ann. Oper. Res."},{"issue":"5\u20136","key":"2009_CR44","doi-asserted-by":"crossref","first-page":"602","DOI":"10.1016\/j.neunet.2005.06.042","volume":"18","author":"A Graves","year":"2005","unstructured":"Graves, A., Schmidhuber, J.: Framewise phoneme classification with bidirectional LSTM and other neural network architectures. Neural Netw. 18(5\u20136), 602\u2013610 (2005)","journal-title":"Neural Netw."},{"key":"2009_CR45","doi-asserted-by":"crossref","unstructured":"Gu, S., Hao, T.: A pointer network based deep learning algorithm for 0\u20131 knapsack problem. In: 2018 Tenth International Conference on Advanced Computational Intelligence (ICACI), pp. 473\u2013477. IEEE (2018)","DOI":"10.1109\/ICACI.2018.8377505"},{"issue":"2","key":"2009_CR46","doi-asserted-by":"crossref","first-page":"298","DOI":"10.3390\/math8020298","volume":"8","author":"S Gu","year":"2020","unstructured":"Gu, S., Yang, Y.: A deep learning algorithm for the max-cut problem based on pointer network structure with supervised learning and reinforcement learning strategies. Mathematics 8(2), 298 (2020)","journal-title":"Mathematics"},{"key":"2009_CR47","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2019.06.111","volume":"390","author":"S Gu","year":"2020","unstructured":"Gu, S., Hao, T., Yao, H.: A pointer network based deep learning algorithm for unconstrained binary quadratic programming problem. Neurocomputing 390, 1\u201311 (2020). https:\/\/doi.org\/10.1016\/j.neucom.2019.06.111. (ISSN 0925-2312)","journal-title":"Neurocomputing"},{"issue":"12","key":"2009_CR48","doi-asserted-by":"crossref","first-page":"1573","DOI":"10.1016\/j.envsoft.2010.04.018","volume":"25","author":"P Guo","year":"2010","unstructured":"Guo, P., Huang, G.H., Wang, X.L., Zhu, H.: A two-stage programming approach for water resources management under randomness and fuzziness. Environ. Model. Softw. 25(12), 1573\u20131581 (2010)","journal-title":"Environ. Model. Softw."},{"key":"2009_CR49","unstructured":"He, Y., Wu, G., Chen, Y., Pedrycz, W.: A two-stage framework and reinforcement learning-based optimization algorithms for complex scheduling problems (2021). arXiv preprint arXiv:2103.05847"},{"issue":"1","key":"2009_CR50","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1109\/TSTE.2018.2805164","volume":"10","author":"MN Hjelmeland","year":"2018","unstructured":"Hjelmeland, M.N., Zou, J., Helseth, A., Ahmed, S.: Nonconvex medium-term hydropower scheduling by stochastic dual dynamic integer programming. IEEE Trans. Sustain. Energy 10(1), 481\u2013490 (2018)","journal-title":"IEEE Trans. Sustain. Energy"},{"issue":"8","key":"2009_CR51","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"2009_CR52","unstructured":"Hu, H., Zhang, X., Yan, X., Wang, L., Xu, Y.: Solving a new 3D bin packing problem with deep reinforcement learning method (2017). arXiv preprint arXiv:1708.05930"},{"issue":"3","key":"2009_CR53","doi-asserted-by":"crossref","first-page":"895","DOI":"10.1287\/opre.2020.2017","volume":"69","author":"D Hwang","year":"2021","unstructured":"Hwang, D., Jaillet, P., Manshadi, V.: Online resource allocation under partially predictable demand. Oper. Res. 69(3), 895\u2013915 (2021)","journal-title":"Oper. Res."},{"key":"2009_CR54","first-page":"6351","volume":"30","author":"E Khalil","year":"2017","unstructured":"Khalil, E., Dai, H., Zhang, Y., Dilkina, B., Song, L.: Learning combinatorial optimization algorithms over graphs. Adv. Neural. Inf. Process. Syst. 30, 6351\u20136361 (2017)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"6","key":"2009_CR55","doi-asserted-by":"crossref","first-page":"1431","DOI":"10.1287\/opre.2015.1421","volume":"63","author":"K Kim","year":"2015","unstructured":"Kim, K., Mehrotra, S.: A two-stage stochastic integer programming approach to integrated staffing and scheduling with application to nurse management. Oper. Res. 63(6), 1431\u20131451 (2015)","journal-title":"Oper. Res."},{"key":"2009_CR56","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization (2014). arXiv preprint arXiv:1412.6980"},{"key":"2009_CR57","unstructured":"Kool, W., van Hoof, H., Welling, M.: Attention, learn to solve routing problems! In: International Conference on Learning Representations (2018). https:\/\/openreview.net\/forum?id=ByxBFsRqYm"},{"key":"2009_CR58","first-page":"21188","volume":"33","author":"Y-D Kwon","year":"2020","unstructured":"Kwon, Y.-D., Choo, J., Kim, B., Yoon, I., Gwon, Y., Min, S.: POMO: policy optimization with multiple optima for reinforcement learning. Adv. Neural. Inf. Process. Syst. 33, 21188\u201321198 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"3","key":"2009_CR59","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1016\/0167-6377(93)90002-X","volume":"13","author":"G Laporte","year":"1993","unstructured":"Laporte, G., Louveaux, F.V.: The integer L-shaped method for stochastic integer programs with complete recourse. Oper. Res. Lett. 13(3), 133\u2013142 (1993)","journal-title":"Oper. Res. Lett."},{"issue":"4","key":"2009_CR60","doi-asserted-by":"crossref","first-page":"1243","DOI":"10.1007\/s11081-019-09471-0","volume":"21","author":"CL Lara","year":"2020","unstructured":"Lara, C.L., Siirola, J.D., Grossmann, I.E.: Electric power infrastructure planning under uncertainty: stochastic dual dynamic integer programming (SDDiP) and parallelization scheme. Optim. Eng. 21(4), 1243\u20131281 (2020)","journal-title":"Optim. Eng."},{"issue":"1","key":"2009_CR61","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1287\/ijoc.2021.1091","volume":"34","author":"E Larsen","year":"2022","unstructured":"Larsen, E., Lachapelle, S., Bengio, Y., Frejinger, E., Lacoste-Julien, S., Lodi, A.: Predicting tactical solutions to operational planning problems under imperfect information. INFORMS J. Comput. 34(1), 227\u2013242 (2022)","journal-title":"INFORMS J. Comput."},{"key":"2009_CR62","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, Y., Lyu, M.R., King, I.: Code completion with neural attention and pointer networks (2017). arXiv preprint arXiv:1711.09573","DOI":"10.24963\/ijcai.2018\/578"},{"issue":"6","key":"2009_CR63","doi-asserted-by":"crossref","first-page":"3103","DOI":"10.1109\/TCYB.2020.2977661","volume":"51","author":"K Li","year":"2020","unstructured":"Li, K., Zhang, T., Wang, R.: Deep reinforcement learning for multiobjective optimization. IEEE Trans. Cybern. 51(6), 3103\u20133114 (2020)","journal-title":"IEEE Trans. Cybern."},{"issue":"8","key":"2009_CR64","doi-asserted-by":"crossref","first-page":"11528","DOI":"10.1109\/TITS.2021.3105232","volume":"23","author":"B Lin","year":"2021","unstructured":"Lin, B., Ghaddar, B., Nathwani, J.: Deep reinforcement learning for the electric vehicle routing problem with time windows. IEEE Trans. Intell. Transp. Syst. 23(8), 11528\u201311538 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"2","key":"2009_CR65","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1023\/A:1021858008222","volume":"24","author":"J Linderoth","year":"2003","unstructured":"Linderoth, J., Wright, S.: Decomposition algorithms for stochastic programming on a computational grid. Comput. Optim. Appl. 24(2), 207\u2013250 (2003)","journal-title":"Comput. Optim. Appl."},{"key":"2009_CR66","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/j.endm.2010.05.013","volume":"36","author":"A Lisser","year":"2010","unstructured":"Lisser, A., Lopez, R.: Stochastic quadratic knapsack with recourse. Electron. Notes Discrete Math. 36, 97\u2013104 (2010)","journal-title":"Electron. Notes Discrete Math."},{"issue":"3","key":"2009_CR67","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1016\/j.orl.2013.02.003","volume":"41","author":"M Lubin","year":"2013","unstructured":"Lubin, M., Martin, K., Petra, C.G., Sand\u0131k\u00e7\u0131, B.: On parallelizing dual decomposition in stochastic integer programming. Oper. Res. Lett. 41(3), 252\u2013258 (2013)","journal-title":"Oper. Res. Lett."},{"key":"2009_CR68","doi-asserted-by":"crossref","unstructured":"Luong, M.-T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation (2015). arXiv preprint arXiv:1508.04025","DOI":"10.18653\/v1\/D15-1166"},{"key":"2009_CR69","unstructured":"Ma, Q., Ge, S., He, D., Thaker, D., Drori, I.: Combinatorial optimization by graph pointer networks and hierarchical reinforcement learning (2019). arXiv preprint arXiv:1911.04936"},{"issue":"3","key":"2009_CR70","doi-asserted-by":"crossref","first-page":"931","DOI":"10.1287\/ijoc.2020.0972","volume":"33","author":"Y Merzifonluoglu","year":"2021","unstructured":"Merzifonluoglu, Y., Geunes, J.: The risk-averse static stochastic knapsack problem. INFORMS J. Comput. 33(3), 931\u2013948 (2021)","journal-title":"INFORMS J. Comput."},{"issue":"3","key":"2009_CR71","doi-asserted-by":"crossref","first-page":"329","DOI":"10.1016\/0377-2217(89)90425-6","volume":"40","author":"H Morita","year":"1989","unstructured":"Morita, H., Ishii, H., Nishida, T.: Stochastic linear knapsack programming problem and its application to a portfolio selection problem. Eur. J. Oper. Res. 40(3), 329\u2013336 (1989)","journal-title":"Eur. J. Oper. Res."},{"key":"2009_CR72","doi-asserted-by":"crossref","unstructured":"Mottini, A., Acuna-Agost, R.: Deep choice model using pointer networks for airline itinerary prediction. In: Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1575\u20131583. Association for Computing Machinery, New York, NY (2017)","DOI":"10.1145\/3097983.3098005"},{"key":"2009_CR73","unstructured":"Nachum, O., Gu, S.S., Lee, H., Levine, S.: Data-efficient hierarchical reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"issue":"8","key":"2009_CR74","doi-asserted-by":"crossref","first-page":"1972","DOI":"10.1016\/j.cor.2013.02.006","volume":"40","author":"J Naoum-Sawaya","year":"2013","unstructured":"Naoum-Sawaya, J., Elhedhli, S.: A stochastic optimization model for real-time ambulance redeployment. Comput. Oper. Res. 40(8), 1972\u20131978 (2013)","journal-title":"Comput. Oper. Res."},{"key":"2009_CR75","unstructured":"Nazari, M., Oroojlooy, A., Tak\u00e1\u010d, M., Snyder, L.V.: Reinforcement learning for solving the vehicle routing problem. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp. 9861\u20139871. Curran Associates Inc., Red Hook (2018)"},{"issue":"9","key":"2009_CR76","doi-asserted-by":"crossref","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen, T.T., Nguyen, N.D., Nahavandi, S.: Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications. IEEE Trans. Cybern. 50(9), 3826\u20133839 (2020)","journal-title":"IEEE Trans. Cybern."},{"issue":"5","key":"2009_CR77","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3453160","volume":"54","author":"S Pateria","year":"2021","unstructured":"Pateria, S., Subagdja, B., Tan, A., Quek, C.: Hierarchical reinforcement learning: a comprehensive survey. ACM Comput. Surv. (CSUR) 54(5), 1\u201335 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"2009_CR78","volume-title":"Stochastic Programming","author":"A Pr\u00e9kopa","year":"2013","unstructured":"Pr\u00e9kopa, A.: Stochastic Programming. Springer, Dordrecht (2013)"},{"issue":"1","key":"2009_CR79","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1287\/moor.16.1.119","volume":"16","author":"RT Rockafellar","year":"1991","unstructured":"Rockafellar, R.T., Wets, R.J.-B.: Scenarios and policy aggregation in optimization under uncertainty. Math. Oper. Res. 16(1), 119\u2013147 (1991)","journal-title":"Math. Oper. Res."},{"issue":"3","key":"2009_CR80","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1007\/BF01580883","volume":"35","author":"A Ruszczy\u0144ski","year":"1986","unstructured":"Ruszczy\u0144ski, A.: A regularized decomposition method for minimizing a sum of polyhedral functions. Math. Program. 35(3), 309\u2013333 (1986)","journal-title":"Math. Program."},{"key":"2009_CR81","doi-asserted-by":"crossref","unstructured":"See, A., Liu, P.J., Manning, C.D.: Get to the point: summarization with pointer-generator networks (2017). arXiv preprint arXiv:1704.04368","DOI":"10.18653\/v1\/P17-1099"},{"key":"2009_CR82","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: International Conference on Machine Learning. Proceedings of Machine Learning Research, Cambridge, MA, pp. 387\u2013395 (2014)"},{"issue":"1","key":"2009_CR83","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"2009_CR84","unstructured":"Tang, Y., Agrawal, S., Faenza, Y.: Reinforcement learning for integer programming: learning to cut. In: International Conference on Machine Learning. Proceedings of Machine Learning Research, Cambridge, MA, pp. 9367\u20139376 (2020)"},{"issue":"2","key":"2009_CR85","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1111\/poms.13277","volume":"30","author":"S Thevenin","year":"2021","unstructured":"Thevenin, S., Adulyasak, Y., Cordeau, J.-F.: Material requirements planning under demand uncertainty using stochastic optimization. Prod. Oper. Manag. 30(2), 475\u2013493 (2021)","journal-title":"Prod. Oper. Manag."},{"key":"2009_CR86","doi-asserted-by":"crossref","first-page":"3151","DOI":"10.1287\/ijoc.2022.1215","volume":"34","author":"S Thevenin","year":"2022","unstructured":"Thevenin, S., Adulyasak, Y., Cordeau, J.-F.: Stochastic dual dynamic programming for multiechelon lot sizing with component substitution. INFORMS J. Comput. 34, 3151\u20133169 (2022)","journal-title":"INFORMS J. Comput."},{"issue":"4","key":"2009_CR87","doi-asserted-by":"crossref","first-page":"638","DOI":"10.1137\/0117061","volume":"17","author":"RM Van Slyke","year":"1969","unstructured":"Van Slyke, R.M., Wets, R.: L-shaped linear programs with applications to optimal control and stochastic programming. SIAM J. Appl. Math. 17(4), 638\u2013663 (1969)","journal-title":"SIAM J. Appl. Math."},{"key":"2009_CR88","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Information Processing Systems, vol. 30 (2017)"},{"key":"2009_CR89","first-page":"2692","volume":"28","author":"O Vinyals","year":"2015","unstructured":"Vinyals, O., Fortunato, M., Jaitly, N.: Pointer networks. Adv. Neural. Inf. Process. Syst. 28, 2692\u20132700 (2015)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2009_CR90","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1016\/j.sepro.2012.04.020","volume":"5","author":"J Wang","year":"2012","unstructured":"Wang, J., Yang, H., Zhu, J.: A two-stage stochastic programming model for emergency resources storage region division. Syst. Eng. Procedia 5, 125\u2013130 (2012)","journal-title":"Syst. Eng. Procedia"},{"issue":"6","key":"2009_CR91","doi-asserted-by":"crossref","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"F Wilcoxon","year":"1945","unstructured":"Wilcoxon, F.: Individual comparisons by ranking methods. Biom. Bull. 1(6), 80\u201383 (1945)","journal-title":"Biom. Bull."},{"issue":"3","key":"2009_CR92","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3), 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"2009_CR93","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1016\/j.trb.2017.05.002","volume":"102","author":"W Fei","year":"2017","unstructured":"Fei, W., Sioshansi, R.: A two-stage stochastic optimization model for scheduling electric vehicle charging loads to relieve distribution-system constraints. Transp. Res. Part B Methodol. 102, 55\u201382 (2017)","journal-title":"Transp. Res. Part B Methodol."},{"key":"2009_CR94","unstructured":"Wu, Y., Song, W., Cao, Z., Zhang, J.: Learning scenario representation for solving two-stage stochastic integer programs. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=06Wy2BtxXrz"},{"issue":"1","key":"2009_CR95","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1287\/trsc.1050.0138","volume":"40","author":"JW Yen","year":"2006","unstructured":"Yen, J.W., Birge, J.R.: A stochastic programming approach to the airline crew scheduling problem. Transp. Sci. 40(1), 3\u201314 (2006)","journal-title":"Transp. Sci."},{"key":"2009_CR96","unstructured":"Yilmaz, D., B\u00fcy\u00fcktahtak\u0131n, \u0130.E.: An expandable learning-optimization framework for sequentially dependent decision-making. Submitted to Eur. J. Oper. Res. (2022)"},{"key":"2009_CR97","doi-asserted-by":"crossref","unstructured":"Yilmaz, D., B\u00fcy\u00fcktahtak\u0131n, \u0130.E.: Learning optimal solutions via an LSTM-optimization framework. Accepted for Publication in Oper. Res. Forum (2023)","DOI":"10.1007\/s43069-023-00224-5"},{"issue":"20","key":"2009_CR98","doi-asserted-by":"crossref","first-page":"7802","DOI":"10.1021\/ie800257x","volume":"47","author":"F You","year":"2008","unstructured":"You, F., Grossmann, I.E.: Mixed-integer nonlinear programming models and algorithms for large-scale supply chain design with stochastic inventory management. Ind. Eng. Chem. Res. 47(20), 7802\u20137817 (2008)","journal-title":"Ind. Eng. Chem. Res."},{"issue":"1","key":"2009_CR99","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1007\/s10107-018-1249-5","volume":"175","author":"J Zou","year":"2019","unstructured":"Zou, J., Ahmed, S., Sun, X.A.: Stochastic dual dynamic integer programming. Math. Program. 175(1), 461\u2013502 (2019)","journal-title":"Math. Program."},{"key":"2009_CR100","doi-asserted-by":"crossref","unstructured":"Yilmaz, D., B\u00fcy\u00fcktahtak\u0131n, \u0130.E.: A non-anticipative learning-optimization framework for solving multi-stage stochastic programs. Under Review for Publication (2023)","DOI":"10.1007\/s11590-023-02009-5"}],"container-title":["Optimization Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11590-023-02009-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11590-023-02009-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11590-023-02009-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,15]],"date-time":"2024-10-15T10:24:19Z","timestamp":1728987859000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11590-023-02009-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,31]]},"references-count":100,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["2009"],"URL":"https:\/\/doi.org\/10.1007\/s11590-023-02009-5","relation":{},"ISSN":["1862-4472","1862-4480"],"issn-type":[{"value":"1862-4472","type":"print"},{"value":"1862-4480","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,31]]},"assertion":[{"value":"17 November 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 April 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}