{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T03:03:41Z","timestamp":1776222221012,"version":"3.50.1"},"reference-count":66,"publisher":"Informa UK Limited","issue":"24","license":[{"start":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T00:00:00Z","timestamp":1754438400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Australian Research Council Industrial Transformation Training Centre in Optimisation Technologies, Integrated Methodologies, and Applications","award":["IC200100009"],"award-info":[{"award-number":["IC200100009"]}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Production Research"],"published-print":{"date-parts":[[2025,12,17]]},"DOI":"10.1080\/00207543.2025.2520596","type":"journal-article","created":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T10:46:13Z","timestamp":1754477173000},"page":"9571-9592","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":1,"title":["Inventory replenishment and fulfilment decisions for an omnichannel retailer: a reinforcement learning-based method"],"prefix":"10.1080","volume":"63","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0914-9808","authenticated-orcid":false,"given":"Maryam","family":"Kolyaei","sequence":"first","affiliation":[{"name":"The University of Melbourne","place":["Parkville, Victoria, Australia"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0527-7426","authenticated-orcid":false,"given":"Lele","family":"Zhang","sequence":"additional","affiliation":[{"name":"The University of Melbourne","place":["Parkville, Victoria, Australia"]}]},{"given":"Michelle","family":"Blom","sequence":"additional","affiliation":[{"name":"The University of Melbourne","place":["Parkville, Victoria, Australia"]}]}],"member":"301","published-online":{"date-parts":[[2025,8,6]]},"reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"crossref","unstructured":"Aboelrous Abdelrahman Adriana F. Gabor and Yingqian Zhang. 2022. \u201cJoint Inventory and Fulfilment Optimization for an Omnichannel Retailer: A Stochastic Optimization Approach.\u201d Available at SSRN 4110440.","DOI":"10.2139\/ssrn.4110440"},{"key":"e_1_3_4_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2021.08.004"},{"key":"e_1_3_4_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2020.04.016"},{"key":"e_1_3_4_5_1","unstructured":"Baird Nikki and Brian Kilcourse. March 2011. Omnichannel Fulfillment and the Future of the Retail Supply Chain. Benchmark Report."},{"key":"e_1_3_4_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2024.02.027"},{"key":"e_1_3_4_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2020.01.011"},{"issue":"5","key":"e_1_3_4_8_1","first-page":"679","article-title":"A Markovian Decision Process","volume":"6","author":"Bellman Richard.","year":"1957","unstructured":"Bellman, Richard. 1957. \u201cA Markovian Decision Process.\u201d Journal of Mathematics and Mechanics 6 (5): 679\u2013684.","journal-title":"Journal of Mathematics and Mechanics"},{"key":"e_1_3_4_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0305-0548(03)00102-3"},{"key":"e_1_3_4_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.07.016"},{"key":"e_1_3_4_11_1","volume-title":"Time Series Analysis: Forecasting and Control","author":"Box George E. P.","year":"2015","unstructured":"Box, George E. P., and Gwilym M. Jenkins. 2015. Time Series Analysis: Forecasting and Control. Fifth edition. John Wiley & Sons."},{"key":"e_1_3_4_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13676-018-0116-0"},{"key":"e_1_3_4_13_1","unstructured":"Brightpearl Multichannel Merchant. 2017. \u201cWhich of the following Best Describe How Your Company Views Omnichannel? [Graph].\u201d Accessed June 17 2022. https:\/\/www.statista.com\/statistics\/1115118\/status-of-omnichannel-strategy-of-retailers-worldwide\/."},{"key":"e_1_3_4_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dss.2008.03.007"},{"key":"e_1_3_4_15_1","doi-asserted-by":"publisher","DOI":"10.1287\/msom.2022.1164"},{"key":"e_1_3_4_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.10.045"},{"key":"e_1_3_4_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.elerap.2020.100993"},{"key":"e_1_3_4_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.01.007"},{"key":"e_1_3_4_19_1","doi-asserted-by":"publisher","DOI":"10.1111\/exsy.v32.1"},{"key":"e_1_3_4_20_1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000071"},{"key":"e_1_3_4_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20119-7"},{"key":"e_1_3_4_22_1","first-page":"1","article-title":"Multi-echelon Inventory Optimization Using Deep Reinforcement Learning","volume":"32","author":"Geevers Kevin","year":"2023","unstructured":"Geevers, Kevin, Lotte van Hezewijk, and Martijn R. K. Mes. 2023. \u201cMulti-echelon Inventory Optimization Using Deep Reinforcement Learning.\u201d Central European Journal of Operations Research 32:1\u201331.","journal-title":"Central European Journal of Operations Research"},{"key":"e_1_3_4_23_1","doi-asserted-by":"publisher","DOI":"10.1287\/msom.2021.1064"},{"key":"e_1_3_4_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cor.2021.105647"},{"key":"e_1_3_4_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2023.06.018"},{"key":"e_1_3_4_26_1","doi-asserted-by":"crossref","unstructured":"Gokhale Apoorva Chirag Trasikar Ankit Shah Arpita Hegde and Sowmiya Raksha Naik. 2021. \u201cA Reinforcement Learning Approach to Inventory Management.\u201d In Advances in Artificial Intelligence and Data Engineering: Select Proceedings of AIDE 2019 281\u2013297. Springer.","DOI":"10.1007\/978-981-15-3514-7_23"},{"key":"e_1_3_4_27_1","doi-asserted-by":"publisher","DOI":"10.1002\/nav.v68.6"},{"key":"e_1_3_4_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2014.11.009"},{"key":"e_1_3_4_29_1","doi-asserted-by":"publisher","DOI":"10.1080\/01966324.1998.10737465"},{"key":"e_1_3_4_30_1","doi-asserted-by":"publisher","DOI":"10.1504\/IJLSM.2023.134404"},{"key":"e_1_3_4_31_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2018.1839"},{"key":"e_1_3_4_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2018.07.047"},{"key":"e_1_3_4_33_1","doi-asserted-by":"crossref","unstructured":"Jalilipour Alishah Elnaz Kamran Moinzadeh and Yong-Pin Zhou. 2015. \u201cInventory Fulfillment Strategies for an Omnichannel Retailer.\u201d SSRN.","DOI":"10.2139\/ssrn.2659671"},{"key":"e_1_3_4_34_1","unstructured":"Jalilipour Alishah Elnaz Kamran Moinzadeh and Yong-Pin Zhou. 2017. Store Fulfillment Strategy for an Omnichannel Retailer. Technical Report. Microsoft Corporation Redmond WA."},{"key":"e_1_3_4_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2021.102550"},{"key":"e_1_3_4_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2017.08.046"},{"key":"e_1_3_4_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2023.109088"},{"key":"e_1_3_4_38_1","doi-asserted-by":"publisher","DOI":"10.3390\/a14080240"},{"key":"e_1_3_4_39_1","doi-asserted-by":"publisher","DOI":"10.1111\/jbl.v43.4"},{"key":"e_1_3_4_40_1","first-page":"1789","article-title":"Collaborative Multiagent Reinforcement Learning by Payoff Propagation","volume":"7","author":"Kok Jelle R.","year":"2006","unstructured":"Kok, Jelle R., and Nikos Vlassis. 2006. \u201cCollaborative Multiagent Reinforcement Learning by Payoff Propagation.\u201d Journal of Machine Learning Research 7:1789\u20131828.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_4_41_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2020.1762019"},{"key":"e_1_3_4_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trb.2022.07.005"},{"key":"e_1_3_4_43_1","doi-asserted-by":"publisher","DOI":"10.3390\/jtaer17020025"},{"key":"e_1_3_4_44_1","doi-asserted-by":"crossref","unstructured":"Louw Cobus Louwrens Labuschagne and Tiffany Woodley. 2022. \u201cA Comparison of Reinforcement Learning Agents Applied to Traffic Signal Optimisation.\u201d In SUMO Conference Proceedings Vol. 3 15\u201343.","DOI":"10.52825\/scp.v3i.116"},{"key":"e_1_3_4_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2021.107353"},{"key":"e_1_3_4_46_1","doi-asserted-by":"publisher","DOI":"10.3390\/math8101640"},{"key":"e_1_3_4_47_1","doi-asserted-by":"publisher","DOI":"10.3390\/pr9010102"},{"key":"e_1_3_4_48_1","doi-asserted-by":"publisher","DOI":"10.3390\/su14105903"},{"key":"e_1_3_4_49_1","volume-title":"Warehouse Management: A Complete Guide to Improving Efficiency and Minimizing Costs in the Modern Warehouse","author":"Richards Gwynne.","year":"2017","unstructured":"Richards, Gwynne. 2017. Warehouse Management: A Complete Guide to Improving Efficiency and Minimizing Costs in the Modern Warehouse. Kogan Page Publishers."},{"key":"e_1_3_4_50_1","unstructured":"RIS News. 2020. \u201cRetail Sales Share Forecast in the United States for 2020 by Channel [Graph].\u201d Accessed June 26 2024 from Statista. https:\/\/www.statista.com\/statistics\/1094319\/retail-sales-share-forecast-by-channel\/."},{"key":"e_1_3_4_51_1","doi-asserted-by":"publisher","DOI":"10.1080\/09593969.2022.2089903"},{"key":"e_1_3_4_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-022-01713-5"},{"key":"e_1_3_4_53_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2140221"},{"key":"e_1_3_4_54_1","doi-asserted-by":"publisher","DOI":"10.1108\/IJPDLM-05-2018-361"},{"key":"e_1_3_4_55_1","unstructured":"Schulman John Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. \u201cProximal Policy Optimization Algorithms.\u201d arXiv preprint arXiv:1707.06347."},{"key":"e_1_3_4_56_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2013.09.014"},{"key":"e_1_3_4_57_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpe.2023.109099"},{"key":"e_1_3_4_58_1","unstructured":"Stranieri Francesco and Fabio Stella. 2022. \u201cComparing Deep Reinforcement Learning Algorithms in Two-Echelon Supply Chains.\u201d arXiv e-prints arXiv\u20132204."},{"key":"e_1_3_4_59_1","first-page":"1","article-title":"Performance of Deep Reinforcement Learning Algorithms in Two-Echelon Inventory Control Systems","author":"Stranieri Francesco","year":"2024","unstructured":"Stranieri, Francesco, Fabio Stella, and Chaaben Kouki. 2024. \u201cPerformance of Deep Reinforcement Learning Algorithms in Two-Echelon Inventory Control Systems.\u201d International Journal of Production Research 1\u201316.","journal-title":"International Journal of Production Research"},{"key":"e_1_3_4_60_1","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton Richard S.","year":"2018","unstructured":"Sutton, Richard S., and Andrew G. Barto. 2018. Reinforcement Learning: An Introduction. MIT press."},{"key":"e_1_3_4_61_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2015.04.004"},{"key":"e_1_3_4_62_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2056540"},{"key":"e_1_3_4_63_1","doi-asserted-by":"crossref","unstructured":"Van Otterlo Martijn and Marco Wiering. 2012. \u201cReinforcement Learning and Markov Decision Processes.\u201d In Reinforcement Learning: State-of-the-art 3\u201342. Springer.","DOI":"10.1007\/978-3-642-27645-3_1"},{"key":"e_1_3_4_64_1","doi-asserted-by":"publisher","DOI":"10.1080\/0965254X.2021.1892163"},{"issue":"9","key":"e_1_3_4_65_1","first-page":"1","article-title":"Transforming Supply Chains through AI: Demand Forecasting, Inventory Management, and Dynamic Optimization","volume":"1","author":"Verma Pradeep.","year":"2024","unstructured":"Verma, Pradeep. 2024. \u201cTransforming Supply Chains through AI: Demand Forecasting, Inventory Management, and Dynamic Optimization.\u201d Integrated Journal of Science and Technology 1 (9): 1\u201314.","journal-title":"Integrated Journal of Science and Technology"},{"key":"e_1_3_4_66_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2022.102712"},{"key":"e_1_3_4_67_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.116564"}],"container-title":["International Journal of Production Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/00207543.2025.2520596","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T10:46:32Z","timestamp":1765277192000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/00207543.2025.2520596"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,6]]},"references-count":66,"journal-issue":{"issue":"24","published-print":{"date-parts":[[2025,12,17]]}},"alternative-id":["10.1080\/00207543.2025.2520596"],"URL":"https:\/\/doi.org\/10.1080\/00207543.2025.2520596","relation":{},"ISSN":["0020-7543","1366-588X"],"issn-type":[{"value":"0020-7543","type":"print"},{"value":"1366-588X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,6]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tprs20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tprs20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-11-01","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-05-11","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-08-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}