{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T05:33:23Z","timestamp":1781069603608,"version":"3.54.1"},"reference-count":39,"publisher":"Informa UK Limited","issue":"17","content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Production Research"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1080\/00207543.2024.2311180","type":"journal-article","created":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T16:42:20Z","timestamp":1709311340000},"page":"6211-6226","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":16,"title":["Performance of deep reinforcement learning algorithms in two-echelon inventory control systems"],"prefix":"10.1080","volume":"62","author":[{"given":"Francesco","family":"Stranieri","sequence":"first","affiliation":[{"name":"Department of Informatics, Systems, and Communication (DISCo), University of Milano-Bicocca, Milan, Italy"},{"name":"Department of Control and Computer Engineering (DAUIN), Polytechnic of Turin, Turin, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fabio","family":"Stella","sequence":"additional","affiliation":[{"name":"Department of Informatics, Systems, and Communication (DISCo), University of Milano-Bicocca, Milan, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chaaben","family":"Kouki","sequence":"additional","affiliation":[{"name":"Department of Operations Management and Decision Science (OMDS), ESSCA School of Management, Angers, France"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"301","published-online":{"date-parts":[[2024,3]]},"reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59747-4_38"},{"key":"e_1_3_4_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.07.016"},{"key":"e_1_3_4_4_1","unstructured":"Brockman G. V. Cheung L. Pettersson J. Schneider J. Schulman J. Tang and W. Zaremba. 2016. \u201cOpenai Gym.\u201d https:\/\/arxiv.org\/abs\/1606.01540."},{"key":"e_1_3_4_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dss.2008.03.007"},{"key":"e_1_3_4_6_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2015.1386"},{"key":"e_1_3_4_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2023.10.007"},{"key":"e_1_3_4_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2018.02.047"},{"key":"e_1_3_4_9_1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000071"},{"key":"e_1_3_4_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10100-023-00872-2"},{"key":"e_1_3_4_11_1","doi-asserted-by":"publisher","DOI":"10.1287\/msom.2021.1064"},{"key":"e_1_3_4_12_1","unstructured":"Gordon G. J. and T. M. Mitchell. 1999. \u201cApproximate Solutions to Markov Decision Processes\u201d."},{"key":"e_1_3_4_13_1","unstructured":"Hubbs C. D. H. D. Perez O. Sarwar N. V. Sahinidis I. E. Grossmann and J. M. Wassick. 2020. \u201cOr-Gym: A Reinforcement Learning Library for Operations Research Problems.\u201d https:\/\/arxiv.org\/abs\/2008.06319."},{"key":"e_1_3_4_14_1","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.1080.0945"},{"key":"e_1_3_4_15_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"e_1_3_4_16_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2023.2276811"},{"key":"e_1_3_4_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2023.109088"},{"key":"e_1_3_4_18_1","unstructured":"Kemmer L. H. von Kleist D. de Rochebou\u00ebt N. Tziortziotis and J. Read. 2018. \u201cReinforcement Learning for Supply Chain Optimization.\u201d In European Workshop on Reinforcement Learning Vol. 14.\u00a0Lille France."},{"key":"e_1_3_4_19_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207540701348779"},{"key":"e_1_3_4_20_1","unstructured":"Mnih V. A. P. Badia M. Mirza A. Graves T. Harley T. P. Lillicrap D. Silver and K. Kavukcuoglu. 2016. \u201cAsynchronous Methods for Deep Reinforcement Learning.\u201d In Proceedings of the 33rd International Conference on International Conference on Machine Learning -- Volume 48 ICML'16 1928\u20131937. JMLR.org."},{"key":"e_1_3_4_21_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_4_22_1","unstructured":"Moritz P. R. Nishihara S. Wang A. Tumanov R. Liaw E. Liang M. Elibol et\u00a0al. 2018 October. \u201cRay: A Distributed Framework for Emerging AI Applications.\u201d In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18) 561\u2013577 Carlsbad CA: USENIX Association. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/moritz."},{"key":"e_1_3_4_23_1","doi-asserted-by":"publisher","DOI":"10.1287\/msom.2020.0939"},{"key":"e_1_3_4_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CAC48633.2019.8997498"},{"key":"e_1_3_4_25_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2020.1735666"},{"key":"e_1_3_4_26_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2140221"},{"key":"e_1_3_4_27_1","unstructured":"Schulman J. S. Levine P. Abbeel M. Jordan and P. Moritz. 2015 07\u201309 July. \u201cTrust Region Policy Optimization.\u201d In Proceedings of the 32nd International Conference on Machine Learning Vol. 37 of Proceedings of Machine Learning Research edited by F. Bach and D. Blei 1889\u20131897. Lille France: PMLR. https:\/\/proceedings.mlr.press\/v37\/schulman15.html."},{"key":"e_1_3_4_28_1","unstructured":"Schulman J. F. Wolski P. Dhariwal A. Radford and O. Klimov. 2017. \u201cProximal Policy Optimization Algorithms.\u201d https:\/\/arxiv.org\/abs\/1707.06347."},{"key":"e_1_3_4_29_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2021.1901153"},{"key":"e_1_3_4_30_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2029611"},{"key":"e_1_3_4_31_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"e_1_3_4_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2023.109099"},{"key":"e_1_3_4_33_1","unstructured":"Stranieri F. and F. Stella. 2022. \u201cA Deep Reinforcement Learning Approach to Supply Chain Inventory Management.\u201d https:\/\/arxiv.org\/abs\/2204.09603."},{"key":"e_1_3_4_34_1","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton R. S.","year":"2018","unstructured":"Sutton, R. S., and A. G. Barto. 2018. Reinforcement Learning: An Introduction. Cambridge: MIT Press."},{"key":"e_1_3_4_35_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2056540"},{"key":"e_1_3_4_36_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"e_1_3_4_37_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2021.2020927"},{"key":"e_1_3_4_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/bf00992696"},{"key":"e_1_3_4_39_1","unstructured":"Wu C. A. Rajeswaran Y. Duan V. Kumar A. M. Bayen S. Kakade I. Mordatch and P. Abbeel. 2018. \u201cVariance Reduction for Policy Gradient with Action-Dependent Factorized Baselines.\u201d https:\/\/arxiv.org\/abs\/1803.07246."},{"key":"e_1_3_4_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2022.102712"}],"container-title":["International Journal of Production Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/00207543.2024.2311180","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T19:33:31Z","timestamp":1727206411000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/00207543.2024.2311180"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3]]},"references-count":39,"journal-issue":{"issue":"17","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["10.1080\/00207543.2024.2311180"],"URL":"https:\/\/doi.org\/10.1080\/00207543.2024.2311180","relation":{},"ISSN":["0020-7543","1366-588X"],"issn-type":[{"value":"0020-7543","type":"print"},{"value":"1366-588X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tprs20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tprs20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2023-05-11","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-01-19","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-03-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}