{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T09:10:12Z","timestamp":1773825012191,"version":"3.50.1"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031746390","type":"print"},{"value":"9783031746406","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-74640-6_37","type":"book-chapter","created":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T23:00:48Z","timestamp":1735686048000},"page":"454-469","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Comparing Deep Reinforcement Learning Algorithms in\u00a0Two-Echelon Supply Chains"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5366-8499","authenticated-orcid":false,"given":"Francesco","family":"Stranieri","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1394-0507","authenticated-orcid":false,"given":"Fabio","family":"Stella","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,1]]},"reference":[{"key":"37_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"584","DOI":"10.1007\/978-3-030-59747-4_38","volume-title":"Computational Logistics","author":"JC Alves","year":"2020","unstructured":"Alves, J.C., Mateus, G.R.: Deep reinforcement learning and optimization approach for multi-echelon supply chain with uncertain demands. In: Lalla-Ruiz, E., Mes, M., Vo\u00df, S. (eds.) ICCL 2020. LNCS, vol. 12433, pp. 584\u2013599. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59747-4_38"},{"issue":"2","key":"37_CR2","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1016\/j.ejor.2021.07.016","volume":"298","author":"RN Boute","year":"2022","unstructured":"Boute, R.N., Gijsbrechts, J., van Jaarsveld, W., Vanvuchelen, N.: Deep reinforcement learning for inventory control: a roadmap. Eur. J. Oper. Res. 298(2), 401\u2013412 (2022). https:\/\/doi.org\/10.1016\/j.ejor.2021.07.016","journal-title":"Eur. J. Oper. Res."},{"issue":"4","key":"37_CR3","doi-asserted-by":"publisher","first-page":"949","DOI":"10.1016\/j.dss.2008.03.007","volume":"45","author":"SK Chaharsooghi","year":"2008","unstructured":"Chaharsooghi, S.K., Heydari, J., Zegordi, S.H.: A reinforcement learning model for supply chain ordering management: an application to the beer game. Decis. Support Syst. 45(4), 949\u2013959 (2008). https:\/\/doi.org\/10.1016\/j.dss.2008.03.007","journal-title":"Decis. Support Syst."},{"issue":"3\u20134","key":"37_CR4","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1561\/2200000071","volume":"11","author":"V Fran\u00e7ois-Lavet","year":"2018","unstructured":"Fran\u00e7ois-Lavet, V., Henderson, P., Islam, R., Bellemare, M.G., Pineau, J.: An introduction to deep reinforcement learning. Found. Trends Mach. Learn. 11(3\u20134), 219\u2013354 (2018). https:\/\/doi.org\/10.1561\/2200000071","journal-title":"Found. Trends Mach. Learn."},{"issue":"3","key":"37_CR5","doi-asserted-by":"publisher","first-page":"1349","DOI":"10.1287\/msom.2021.1064","volume":"24","author":"J Gijsbrechts","year":"2022","unstructured":"Gijsbrechts, J., Boute, R.N., Mieghem, J.A.V., Zhang, D.J.: Can deep reinforcement learning improve inventory management? performance on lost sales, dual-sourcing, and multi-echelon problems. Manuf. Serv. Oper. Manag. 24(3), 1349\u20131368 (2022). https:\/\/doi.org\/10.1287\/msom.2021.1064","journal-title":"Manuf. Serv. Oper. Manag."},{"key":"37_CR6","doi-asserted-by":"publisher","unstructured":"Hubbs, C.D., Perez, H.D., Sarwar, O., Sahinidis, N.V., Grossmann, I.E., Wassick, J.M.: Or-gym: a reinforcement learning library for operations research problems (2020). https:\/\/doi.org\/10.48550\/ARXIV.2008.06319","DOI":"10.48550\/ARXIV.2008.06319"},{"issue":"6","key":"37_CR7","doi-asserted-by":"publisher","first-page":"1185","DOI":"10.1162\/neco.1994.6.6.1185","volume":"6","author":"T Jaakkola","year":"1994","unstructured":"Jaakkola, T., Jordan, M.I., Singh, S.P.: On the convergence of stochastic iterative dynamic programming algorithms. Neural Comput. 6(6), 1185\u20131201 (1994). https:\/\/doi.org\/10.1162\/neco.1994.6.6.1185","journal-title":"Neural Comput."},{"key":"37_CR8","unstructured":"Kemmer, L., von Kleist, H., de\u00a0Rochebou\u00ebt, D., Tziortziotis, N., Read, J.: Reinforcement learning for supply chain optimization. In: European Workshop on Reinforcement Learning, vol.\u00a014 (2018)"},{"issue":"3","key":"37_CR9","doi-asserted-by":"publisher","first-page":"955","DOI":"10.1016\/j.ejor.2018.02.047","volume":"269","author":"T de Kok","year":"2018","unstructured":"de Kok, T., Grob, C., Laumanns, M., Minner, S., Rambau, J., Schade, K.: A typology and literature review on stochastic multi-echelon inventory models. Eur. J. Oper. Res. 269(3), 955\u2013983 (2018). https:\/\/doi.org\/10.1016\/j.ejor.2018.02.047","journal-title":"Eur. J. Oper. Res."},{"issue":"7553","key":"37_CR10","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015). https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"key":"37_CR11","unstructured":"Li, L., et al.: A system for massively parallel hyperparameter tuning. In: Dhillon, I., Papailiopoulos, D., Sze, V. (eds.) Proceedings of Machine Learning and Systems, vol.\u00a02, pp. 230\u2013246 (2020)"},{"key":"37_CR12","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: Balcan, M.F., Weinberger, K.Q. (eds.) Proceedings of the 33rd International Conference on Machine Learning, vol.\u00a048, pp. 1928\u20131937. PMLR, New York (2016)"},{"issue":"7540","key":"37_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"issue":"1","key":"37_CR14","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1287\/msom.2020.0939","volume":"24","author":"A Oroojlooyjadid","year":"2022","unstructured":"Oroojlooyjadid, A., Nazari, M., Snyder, L.V., Tak\u00e1\u010d, M.: A deep q-network for the beer game: deep reinforcement learning for inventory optimization. Manuf. Serv. Oper. Manag. 24(1), 285\u2013304 (2022). https:\/\/doi.org\/10.1287\/msom.2020.0939","journal-title":"Manuf. Serv. Oper. Manag."},{"key":"37_CR15","doi-asserted-by":"publisher","unstructured":"Peng, Z., Zhang, Y., Feng, Y., Zhang, T., Wu, Z., Su, H.: Deep reinforcement learning approach for capacitated supply chain optimization under demand uncertainty. In: 2019 Chinese Automation Congress (CAC). IEEE (2019). https:\/\/doi.org\/10.1109\/cac48633.2019.8997498","DOI":"10.1109\/cac48633.2019.8997498"},{"key":"37_CR16","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Bach, F., Blei, D. (eds.) Proceedings of the 32nd International Conference on Machine Learning, vol.\u00a037, pp. 1889\u20131897. PMLR, Lille (2015)"},{"key":"37_CR17","doi-asserted-by":"publisher","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017). https:\/\/doi.org\/10.48550\/ARXIV.1707.06347","DOI":"10.48550\/ARXIV.1707.06347"},{"issue":"7676","key":"37_CR18","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017). https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"key":"37_CR19","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT press, Cambridge (2018)"},{"issue":"7782","key":"37_CR20","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019). https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"issue":"3\u20134","key":"37_CR21","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/bf00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992). https:\/\/doi.org\/10.1007\/bf00992696","journal-title":"Mach. Learn."},{"key":"37_CR22","doi-asserted-by":"publisher","unstructured":"Wu, C., et al.: Variance reduction for policy gradient with action-dependent factorized baselines (2018). https:\/\/doi.org\/10.48550\/ARXIV.1803.07246","DOI":"10.48550\/ARXIV.1803.07246"},{"key":"37_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2022.102712","volume":"162","author":"Y Yan","year":"2022","unstructured":"Yan, Y., Chow, A.H., Ho, C.P., Kuo, Y.H., Wu, Q., Ying, C.: Reinforcement learning for logistics and supply chain management: methodologies, state of the art, and future opportunities. Transport. Res. Part E: Logist. Transport. Rev. 162, 102712 (2022). https:\/\/doi.org\/10.1016\/j.tre.2022.102712","journal-title":"Transport. Res. Part E: Logist. Transport. Rev."}],"container-title":["Communications in Computer and Information Science","Machine Learning and Principles and Practice of Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-74640-6_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T08:17:48Z","timestamp":1738311468000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-74640-6_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031746390","9783031746406"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-74640-6_37","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}