{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T20:35:05Z","timestamp":1777322105079,"version":"3.51.4"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,5,28]],"date-time":"2021-05-28T00:00:00Z","timestamp":1622160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,5,28]],"date-time":"2021-05-28T00:00:00Z","timestamp":1622160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s00521-021-06129-w","type":"journal-article","created":{"date-parts":[[2021,5,28]],"date-time":"2021-05-28T09:11:28Z","timestamp":1622193088000},"page":"1735-1757","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["Scalable multi-product inventory control with lead time constraints using reinforcement learning"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9014-1098","authenticated-orcid":false,"given":"Hardik","family":"Meisheri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nazneen N.","family":"Sultana","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mayank","family":"Baranwal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vinita","family":"Baniwal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Somjit","family":"Nath","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satyam","family":"Verma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Balaraman","family":"Ravindran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Harshad","family":"Khadilkar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,5,28]]},"reference":[{"issue":"21","key":"6129_CR1","doi-asserted-by":"publisher","first-page":"4445","DOI":"10.1080\/00207540600597138","volume":"44","author":"TF Abdelmaguid","year":"2006","unstructured":"Abdelmaguid TF, Dessouky MM (2006) A genetic algorithm approach to the integrated inventory-distribution problem. Int J Prod Res 44(21):4445\u20134464","journal-title":"Int J Prod Res"},{"issue":"3","key":"6129_CR2","doi-asserted-by":"publisher","first-page":"922","DOI":"10.1016\/j.ejor.2009.01.058","volume":"199","author":"BT Aharon","year":"2009","unstructured":"Aharon BT, Boaz G, Shimrit S (2009) Robust multi-echelon multi-period inventory control. Eur J Oper Res 199(3):922\u2013935","journal-title":"Eur J Oper Res"},{"issue":"1\u20134","key":"6129_CR3","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/s00170-015-6796-9","volume":"79","author":"AA Akbari","year":"2015","unstructured":"Akbari AA, Karimi B (2015) A new robust optimization approach for integrated multi-echelon, multi-product, multi-period supply chain network design under process uncertainty. Int J Adv Manuf Technol 79(1\u20134):229\u2013244","journal-title":"Int J Adv Manuf Technol"},{"key":"6129_CR4","unstructured":"\u00c5str\u00f6m KJ, Wittenmark B (2013) Adaptive control. Courier Corporation"},{"key":"6129_CR5","doi-asserted-by":"crossref","unstructured":"Baniwal V, Kayal C, Shah D, Ma P, Khadilkar H (2019) An imitation learning approach for computing anticipatory picking decisions in retail distribution centres. In: 2019 American control conference (ACC). IEEE, pp 4186\u20134191","DOI":"10.23919\/ACC.2019.8814674"},{"key":"6129_CR6","unstructured":"Barat S, Khadilkar H, Meisheri H, Kulkarni V, Baniwal V, Kumar P, Gajrani M (2019) Actor based simulation for closed loop control of supply chain using reinforcement learning. In: Proceedings of the 18th international conference on autonomous agents and multiAgent systems. International Foundation for Autonomous Agents and Multiagent Systems, pp 1802\u20131804"},{"issue":"3","key":"6129_CR7","doi-asserted-by":"publisher","first-page":"458","DOI":"10.1057\/jors.2010.188","volume":"62","author":"Y Barlas","year":"2011","unstructured":"Barlas Y, Gunduz B (2011) Demand forecasting and sharing strategies to reduce fluctuations and the bullwhip effect in supply chains. J Oper Res Soc 62(3):458\u2013473","journal-title":"J Oper Res Soc"},{"key":"6129_CR8","volume-title":"Dynamic programming and optimal control, chap 6","author":"DP Bertsekas","year":"2005","unstructured":"Bertsekas DP (2005) Dynamic programming and optimal control, chap 6, vol 1. Athena scientific Belmont, MA"},{"key":"6129_CR9","unstructured":"Bertsekas DP, Tsitsiklis JN (1995) Neuro-dynamic programming: an overview. In: Proceedings of 1995 34th IEEE conference on decision and control, vol.\u00a01. IEEE, pp 560\u2013564"},{"issue":"3","key":"6129_CR10","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1287\/opre.2015.1365","volume":"63","author":"D Bertsimas","year":"2015","unstructured":"Bertsimas D, Georghiou A (2015) Design of near optimal decision rules in multistage adaptive mixed-integer optimization. Oper Res 63(3):610\u2013627","journal-title":"Oper Res"},{"key":"6129_CR11","doi-asserted-by":"crossref","unstructured":"Bertsimas D, Thiele A (2004) A robust optimization approach to supply chain management. In: International conference on integer programming and combinatorial optimization. Springer, pp 86\u2013100","DOI":"10.1007\/978-3-540-25960-2_7"},{"issue":"1","key":"6129_CR12","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1287\/opre.1050.0238","volume":"54","author":"D Bertsimas","year":"2006","unstructured":"Bertsimas D, Thiele A (2006) A robust optimization approach to inventory theory. Oper Res 54(1):150\u2013168","journal-title":"Oper Res"},{"key":"6129_CR13","doi-asserted-by":"crossref","unstructured":"Bouabdallah S, Noth A, Siegwart R (2004) Pid vs lq control techniques applied to an indoor micro quadrotor. In: Proceedings of The IEEE international conference on intelligent robots and systems (IROS). IEEE, pp 2451\u20132456","DOI":"10.1109\/IROS.2004.1389776"},{"key":"6129_CR14","doi-asserted-by":"crossref","unstructured":"Cachon G, Fisher M (1997) Campbell soup\u2019s continuous replenishment program: evaluation and enhanced inventory decision rules. Prod Oper Manage 6(3):266\u2013276","DOI":"10.1111\/j.1937-5956.1997.tb00430.x"},{"key":"6129_CR15","unstructured":"Camacho EF, Alba CB (2013) Model predictive control. Springer Science & Business Media"},{"issue":"3","key":"6129_CR16","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1016\/j.ejor.2006.12.004","volume":"184","author":"R Carbonneau","year":"2008","unstructured":"Carbonneau R, Laframboise K, Vahidov R (2008) Application of machine learning techniques for supply chain demand forecasting. Eur J Oper Res 184(3):1140\u20131154","journal-title":"Eur J Oper Res"},{"issue":"5\u20136","key":"6129_CR17","doi-asserted-by":"publisher","first-page":"1911","DOI":"10.1016\/j.apm.2013.09.010","volume":"38","author":"LE C\u00e1rdenas-Barr\u00f3n","year":"2014","unstructured":"C\u00e1rdenas-Barr\u00f3n LE, Trevi\u00f1o-Garza G (2014) An optimal solution to a three echelon supply chain network with multi-product and multi-period. Appl Math Modell 38(5\u20136):1911\u20131918","journal-title":"Appl Math Modell"},{"issue":"2","key":"6129_CR18","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1287\/opre.1090.0698","volume":"58","author":"F Caro","year":"2010","unstructured":"Caro F, Gallien J (2010) Inventory management of a fast-fashion retail network. Oper Res 58(2):257\u2013273","journal-title":"Oper Res"},{"issue":"4","key":"6129_CR19","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1287\/mnsc.6.4.475","volume":"6","author":"AJ Clark","year":"1960","unstructured":"Clark AJ, Scarf H (1960) Optimal policies for a multi-echelon inventory problem. Manage Sci 6(4):475\u2013490","journal-title":"Manage Sci"},{"key":"6129_CR20","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1016\/j.cor.2014.01.013","volume":"47","author":"LC Coelho","year":"2014","unstructured":"Coelho LC, Laporte G (2014) Optimal joint replenishment, delivery and inventory management policies for perishable products. Comput Oper Res 47:42\u201352","journal-title":"Comput Oper Res"},{"issue":"4","key":"6129_CR21","doi-asserted-by":"publisher","first-page":"839","DOI":"10.1016\/j.dss.2011.11.018","volume":"52","author":"C Condea","year":"2012","unstructured":"Condea C, Thiesse F, Fleisch E (2012) Rfid-enabled shelf replenishment with backroom monitoring in retail stores. Decis Supp Syst 52(4):839\u2013849","journal-title":"Decis Supp Syst"},{"issue":"8","key":"6129_CR22","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1109\/9.29425","volume":"34","author":"JC Doyle","year":"1989","unstructured":"Doyle JC, Glover K, Khargonekar PP, Francis BA (1989) State-space solutions to standard h\/sub 2\/and h\/sub infinity\/control problems. IEEE Trans Autom Control 34(8):831\u2013847","journal-title":"IEEE Trans Autom Control"},{"key":"6129_CR23","unstructured":"Duan Y, Andrychowicz M, Stadie B, Ho J, Schneider J, Sutskever I, Abbeel P, Zaremba W (2017) One-shot imitation learning. In: NIPS 31"},{"key":"6129_CR24","unstructured":"Fernie J, Sparks L (2018) Logistics and retail management: emerging issues and new challenges in the retail supply chain. Kogan page publishers"},{"issue":"2","key":"6129_CR25","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/S0925-5273(00)00156-0","volume":"78","author":"I Giannoccaro","year":"2002","unstructured":"Giannoccaro I, Pontrandolfo P (2002) Inventory management in supply chains: a reinforcement learning approach. Int J Prod Econ 78(2):153\u2013161","journal-title":"Int J Prod Econ"},{"issue":"1","key":"6129_CR26","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1287\/trsc.36.1.21.570","volume":"36","author":"GA Godfrey","year":"2002","unstructured":"Godfrey GA, Powell WB (2002) An adaptive dynamic programming algorithm for dynamic fleet management, I: single period travel times. Transp Sci 36(1):21\u201339","journal-title":"Transp Sci"},{"key":"6129_CR27","unstructured":"Golnaraghi MF, Kuo BC (2017) Automatic control systems. McGraw-Hill Education"},{"issue":"3","key":"6129_CR28","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1016\/0009-2509(83)80157-2","volume":"38","author":"TK Gustafsson","year":"1983","unstructured":"Gustafsson TK, Waller KV (1983) Dynamic modeling and reaction invariant control of ph. Chem Eng Sci 38(3):389\u2013398","journal-title":"Chem Eng Sci"},{"key":"6129_CR29","doi-asserted-by":"crossref","unstructured":"Harifi S, Khalilian M, Mohammadzadeh J, Ebrahimnejad S (2020) Optimization in solving inventory control problem using nature inspired emperor penguins colony algorithm. J Intell Manuf 1\u201315","DOI":"10.1007\/s10845-020-01616-8"},{"key":"6129_CR30","doi-asserted-by":"crossref","unstructured":"Harmer J, Gisslen L, del Val J, Holst H, Bergdahl J, Olsson T, Sjoo K, Nordin M (2018) Imitation learning with concurrent actions in 3d games. arXiv preprint arXiv:1803.05402","DOI":"10.1109\/CIG.2018.8490398"},{"key":"6129_CR31","doi-asserted-by":"crossref","unstructured":"Hofmann E, Rutschmann E (2018) Big data analytics and demand forecasting in supply chains: a conceptual analysis. Int J Logist Manage","DOI":"10.1108\/IJLM-04-2017-0088"},{"key":"6129_CR32","volume-title":"Robust adaptive control","author":"PA Ioannou","year":"1996","unstructured":"Ioannou PA, Sun J (1996) Robust adaptive control, vol 1. PTR Prentice-Hall Upper Saddle River, NJ"},{"issue":"3","key":"6129_CR33","doi-asserted-by":"publisher","first-page":"6520","DOI":"10.1016\/j.eswa.2008.07.036","volume":"36","author":"C Jiang","year":"2009","unstructured":"Jiang C, Sheng Z (2009) Case-based reinforcement learning for dynamic inventory control in a multi-agent supply-chain system. Exp Syst Appl 36(3):6520\u20136526","journal-title":"Exp Syst Appl"},{"key":"6129_CR34","unstructured":"Kaggle: Instacart market basket analysis data. https:\/\/www.kaggle.com\/c\/instacart-market-basket-analysis\/data. Accessed Aug 2018"},{"key":"6129_CR35","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.eswa.2017.08.046","volume":"91","author":"A Kara","year":"2018","unstructured":"Kara A, Dogan I (2018) Reinforcement learning approaches for specifying ordering policies of perishable inventory systems. Exp Syst Appl 91:150\u2013158","journal-title":"Exp Syst Appl"},{"issue":"2","key":"6129_CR36","doi-asserted-by":"publisher","first-page":"727","DOI":"10.1109\/TITS.2018.2829165","volume":"20","author":"H Khadilkar","year":"2019","unstructured":"Khadilkar H (2019) A scalable reinforcement learning algorithm for scheduling railway lines. IEEE Trans Intell Transp Syst 20(2):727\u2013736","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"6129_CR37","unstructured":"Konda V, Tsitsiklis J (2000) Actor-critic algorithms. In: Advances in neural information processing systems, pp 1008\u20131014"},{"key":"6129_CR38","unstructured":"Kushner HJ, Clark DS (2012) Stochastic approximation methods for constrained and unconstrained systems, vol 26. Springer Science & Business Media"},{"issue":"5","key":"6129_CR39","doi-asserted-by":"publisher","first-page":"1630","DOI":"10.1021\/ie950519h","volume":"35","author":"H Lee","year":"1996","unstructured":"Lee H, Pinto JM, Grossmann IE, Park S (1996) Mixed-integer linear programming model for refinery short-term scheduling of crude oil unloading with inventory management. Indus Eng Chem Res 35(5):1630\u20131641","journal-title":"Indus Eng Chem Res"},{"key":"6129_CR40","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. CoRR arxiv:abs\/1509.02971"},{"issue":"4","key":"6129_CR41","doi-asserted-by":"publisher","first-page":"900","DOI":"10.1109\/TEC.2005.853758","volume":"21","author":"XJ Liu","year":"2006","unstructured":"Liu XJ, Chan C (2006) Neuro-fuzzy generalized predictive control of boiler steam temperature. IEEE Trans Energy Con 21(4):900\u2013908","journal-title":"IEEE Trans Energy Con"},{"issue":"6","key":"6129_CR42","doi-asserted-by":"publisher","first-page":"789","DOI":"10.1016\/S0005-1098(99)00214-9","volume":"36","author":"DQ Mayne","year":"2000","unstructured":"Mayne DQ, Rawlings JB, Rao CV, Scokaert PO (2000) Constrained model predictive control: stability and optimality. Automatica 36(6):789\u2013814","journal-title":"Automatica"},{"issue":"7540","key":"6129_CR43","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep RL. Nature 518(7540):529","journal-title":"Nature"},{"issue":"9\u201312","key":"6129_CR44","doi-asserted-by":"publisher","first-page":"1739","DOI":"10.1007\/s00170-013-5378-y","volume":"70","author":"SM Mousavi","year":"2014","unstructured":"Mousavi SM, Hajipour V, Niaki STA, Aalikar N (2014) A multi-product multi-period inventory control problem under inflation and discount: a parameter-tuned particle swarm optimization algorithm. Int J Adv Manuf Technol 70(9\u201312):1739\u20131756","journal-title":"Int J Adv Manuf Technol"},{"issue":"5","key":"6129_CR45","doi-asserted-by":"publisher","first-page":"582","DOI":"10.1287\/mnsc.40.5.582","volume":"40","author":"S Nahmias","year":"1994","unstructured":"Nahmias S, Smith SA (1994) Optimizing inventory levels in a two-echelon retailer system with partial lost sales. Manage Sci 40(5):582\u2013596","journal-title":"Manage Sci"},{"key":"6129_CR46","doi-asserted-by":"crossref","unstructured":"Ng AY, Coates A, Diel M, Ganapathi V, Schulte J, Tse B, Berger E, Liang E (2006) Autonomous inverted helicopter flight via reinforcement learning. In: Experimental robotics IX. Springer, pp 363\u2013372","DOI":"10.1007\/11552246_35"},{"key":"6129_CR47","unstructured":"Ogata K, Yang Y (2002) Modern control engineering, vol 4. Prentice Hall"},{"issue":"7","key":"6129_CR48","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1002\/nav.10087","volume":"50","author":"KP Papadaki","year":"2003","unstructured":"Papadaki KP, Powell WB (2003) An adaptive dynamic programming algorithm for a stochastic multiproduct batch dispatch problem. Naval Res Logist 50(7):742\u2013769","journal-title":"Naval Res Logist"},{"key":"6129_CR49","doi-asserted-by":"crossref","unstructured":"Powell WB (2007) Approximate dynamic programming: solving the curses of dimensionality, vol 703. John Wiley & Sons","DOI":"10.1002\/9780470182963"},{"issue":"3","key":"6129_CR50","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/BF01797153","volume":"6","author":"JM Proth","year":"1996","unstructured":"Proth JM, Sauer N, Wardi Y, Xie X (1996) Marking optimization of stochastic timed event graphs using ipa. Disc Event Dyn Syst 6(3):221\u2013239","journal-title":"Disc Event Dyn Syst"},{"issue":"24","key":"6129_CR51","doi-asserted-by":"publisher","first-page":"7472","DOI":"10.1080\/00207543.2014.937509","volume":"52","author":"R Qiu","year":"2014","unstructured":"Qiu R, Shang J (2014) Robust optimisation for risk-averse multi-period inventory decision with partial demand distribution information. Int J Prod Res 52(24):7472\u20137495","journal-title":"Int J Prod Res"},{"issue":"1","key":"6129_CR52","first-page":"33","volume":"9","author":"P Radhakrishnan","year":"2009","unstructured":"Radhakrishnan P, Prasad V, Gopalan M (2009) Inventory optimization in supply chain management using genetic algorithm. Int J Comput Sci Netw Sec 9(1):33\u201340","journal-title":"Int J Comput Sci Netw Sec"},{"key":"6129_CR53","unstructured":"Reddi SJ, Kale S, Kumar S (2019) On the convergence of adam and beyond. arXiv preprint arXiv:1904.09237"},{"key":"6129_CR54","unstructured":"Ross S, Bagnell JA (2010) Efficient reductions for imitation learning. In: Proceedings of the international conference artificial intelligence and statistics (AISTATS)"},{"issue":"6","key":"6129_CR55","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1016\/S1364-6613(99)01327-3","volume":"3","author":"S Schaal","year":"1999","unstructured":"Schaal S (1999) Is imitation learning the route to humanoid robots? Trends Cogn Sci 3(6):233\u2013242","journal-title":"Trends Cogn Sci"},{"key":"6129_CR56","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: International conference on machine learning, pp 1889\u20131897"},{"key":"6129_CR57","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"issue":"3","key":"6129_CR58","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1287\/opre.1090.0746","volume":"58","author":"CT See","year":"2010","unstructured":"See CT, Sim M (2010) Robust approximation to multiperiod inventory management. Oper Res 58(3):583\u2013594","journal-title":"Oper Res"},{"key":"6129_CR59","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1016\/j.cie.2016.07.022","volume":"99","author":"H Shaabani","year":"2016","unstructured":"Shaabani H, Kamalabadi IN (2016) An efficient population-based simulated annealing algorithm for the multi-product multi-retailer perishable inventory routing problem. Comput Indus Eng 99:189\u2013201","journal-title":"Comput Indus Eng"},{"key":"6129_CR60","unstructured":"Shah D (2020) The six aces to thrive in supply chain 4.0. https:\/\/www.tcs.com\/blogs\/six-aces-to-thrive-in-supply-chain-4-0"},{"key":"6129_CR61","unstructured":"Shalev-Shwartz S, Shammah S, Shashua A (2016) Safe, multi-agent, reinforcement learning for autonomous driving. arXiv preprint arXiv:1610.03295"},{"key":"6129_CR62","unstructured":"Shervais S (2000) Adaptive critic design of control policies for a multi-echelon inventory system. Ph.D. thesis, Portland State University"},{"issue":"2","key":"6129_CR63","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1109\/TSMCA.2003.809214","volume":"33","author":"S Shervais","year":"2003","unstructured":"Shervais S, Shannon TT, Lendaris GG (2003) Intelligent supply chain management using adaptive critic learning. IEEE Trans Syst Man Cybern Part A Syst Humans 33(2):235\u2013244","journal-title":"IEEE Trans Syst Man Cybern Part A Syst Humans"},{"key":"6129_CR64","doi-asserted-by":"crossref","unstructured":"Si J, Barto AG, Powell WB, Wunsch D (2004) Handbook of learning and approximate dynamic programming, vol 2. John Wiley & Sons","DOI":"10.1109\/9780470544785"},{"issue":"7587","key":"6129_CR65","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M et al (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484","journal-title":"Nature"},{"issue":"1","key":"6129_CR66","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1057\/jors.1979.8","volume":"30","author":"EA Silver","year":"1979","unstructured":"Silver EA (1979) A simple inventory replenishment decision rule for a linear trend in demand. J Oper Res Soc 30(1):71\u201375","journal-title":"J Oper Res Soc"},{"issue":"4","key":"6129_CR67","doi-asserted-by":"publisher","first-page":"628","DOI":"10.1287\/opre.29.4.628","volume":"29","author":"EA Silver","year":"1981","unstructured":"Silver EA (1981) Operations research in inventory management: a review and critique. Oper Res 29(4):628\u2013645","journal-title":"Oper Res"},{"issue":"1","key":"6129_CR68","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1287\/opre.48.1.50.12443","volume":"48","author":"SA Smith","year":"2000","unstructured":"Smith SA, Agrawal N (2000) Management of multi-item retail inventory systems with demand substitution. Oper Res 48(1):50\u201364","journal-title":"Oper Res"},{"issue":"1","key":"6129_CR69","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1287\/msom.2019.0798","volume":"22","author":"JS Song","year":"2020","unstructured":"Song JS, van Houtum GJ, Van Mieghem JA (2020) Capacity and inventory management: Review, trends, and projections. Manuf Serv Oper Manage 22(1):36\u201346","journal-title":"Manuf Serv Oper Manage"},{"key":"6129_CR70","doi-asserted-by":"crossref","unstructured":"Tavakoli A, Pardo F, Kormushev P (2018) Action branching architectures for deep reinforcement learning. In: Thirty-second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.11798"},{"issue":"1","key":"6129_CR71","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1287\/ijoc.1040.0079","volume":"18","author":"H Topaloglu","year":"2006","unstructured":"Topaloglu H, Powell W (2006) Dynamic-programming approximations for stochastic time-staged integer multicommodity-flow problems. INFORMS J Comput 18(1):31\u201342","journal-title":"INFORMS J Comput"},{"key":"6129_CR72","unstructured":"Utkin V, Guldner J, Shi J (2009) Sliding mode control in electro-mechanical systems. CRC Press"},{"key":"6129_CR73","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: AAAI, vol\u00a02. Phoenix, AZ, p\u00a05","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"6129_CR74","doi-asserted-by":"publisher","unstructured":"Van Roy B, Bertsekas DP, Lee Y, Tsitsiklis JN (1997) A neuro-dynamic programming approach to retailer inventory management. In: Proceedings of the 36th IEEE conference on decision and control, vol\u00a04, pp 4052\u20134057. https:\/\/doi.org\/10.1109\/CDC.1997.652501","DOI":"10.1109\/CDC.1997.652501"},{"key":"6129_CR75","unstructured":"Verma R, Saikia S, Khadilkar H, Agarwal P, Srinivasan A, Shroff G (2019) An RL framework for container selection and ship load sequencing in ports. In: International conference on autonomous agents and multi agent systems"},{"key":"6129_CR76","unstructured":"Visentin A, Prestwich S, Rossi R, Tarim SA (2021) Computing optimal (r, s, s) policy parameters by a hybrid of branch-and-bound and stochastic dynamic programming. Eur J Oper Res"},{"key":"6129_CR77","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1016\/j.ijpe.2017.04.010","volume":"189","author":"L Yang","year":"2017","unstructured":"Yang L, Li H, Campbell JF, Sweeney DC (2017) Integrated multi-period dynamic inventory classification and control. Int J Prod Econ 189:86\u201396","journal-title":"Int J Prod Econ"},{"key":"6129_CR78","unstructured":"Zhang W, Dietterich T (1995) A reinforcement learning approach to job-shop scheduling. In: International joint conference on artificial intelligence. Montreal, Canada"},{"key":"6129_CR79","unstructured":"Zipkin P (2000) Foundations of inventory management. McGraw-Hill Companies, Incorporated. https:\/\/books.google.co.in\/books?id=rjzbkQEACAAJ"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-06129-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-021-06129-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-06129-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,2]],"date-time":"2023-02-02T20:10:36Z","timestamp":1675368636000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-021-06129-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,28]]},"references-count":79,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["6129"],"URL":"https:\/\/doi.org\/10.1007\/s00521-021-06129-w","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,5,28]]},"assertion":[{"value":"16 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 May 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interests"}}]}}