{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T16:14:38Z","timestamp":1756311278799,"version":"3.40.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434266"},{"type":"electronic","value":"9783031434273"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43427-3_37","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T21:01:41Z","timestamp":1694898101000},"page":"619-634","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Cooperative Multi-agent Reinforcement Learning for\u00a0Inventory Management"],"prefix":"10.1007","author":[{"given":"Madhav","family":"Khirwar","sequence":"first","affiliation":[]},{"given":"Karthik S.","family":"Gurumoorthy","sequence":"additional","affiliation":[]},{"given":"Ankit Ajit","family":"Jain","sequence":"additional","affiliation":[]},{"given":"Shantala","family":"Manchenahally","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"issue":"6","key":"37_CR1","doi-asserted-by":"publisher","first-page":"4464","DOI":"10.1016\/j.apm.2012.09.005","volume":"37","author":"N Anbazhagan","year":"2013","unstructured":"Anbazhagan, N., Wang, J., Gomathi, D.: Base stock policy with retrial demands. Appl. Math. Model. 37(6), 4464\u20134473 (2013)","journal-title":"Appl. Math. Model."},{"issue":"4","key":"37_CR2","doi-asserted-by":"publisher","first-page":"1316","DOI":"10.3182\/20090603-3-RU-2001.0561","volume":"42","author":"A D\u2019Atri","year":"2009","unstructured":"D\u2019Atri, A., et al.: From supply chains to supply networks: The beer game evolution. IFAC Proc. Volumes 42(4), 1316\u20131321 (2009)","journal-title":"IFAC Proc. Volumes"},{"key":"37_CR3","unstructured":"Ding, Y., et al.: Multi-agent reinforcement learning with shared resource in inventory management. CoRR abs\/2212.07684 (2022)"},{"key":"37_CR4","unstructured":"Farquhar, G., Gustafson, L., Lin, Z., Whiteson, S., Usunier, N., Synnaeve, G.: Growing action spaces. In: Proceedings of the 37th International Conference on Machine Learning, vol. 119, pp. 3040\u20133051. PMLR (2020)"},{"issue":"2","key":"37_CR5","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/S0925-5273(00)00156-0","volume":"78","author":"I Giannoccaro","year":"2002","unstructured":"Giannoccaro, I., Pontrandolfo, P.: Inventory management in supply chains: a reinforcement learning approach. Int. J. Prod. Econ. 78(2), 153\u2013161 (2002)","journal-title":"Int. J. Prod. Econ."},{"key":"37_CR6","unstructured":"Goodfellow, I.J., Mirza, M., Xiao, D., Courville, A., Bengio, Y.: An empirical investigation of catastrophic forgetting in gradient-based neural networks. arXiv preprint arXiv:1312.6211 (2013)"},{"key":"37_CR7","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/s10994-012-5322-7","volume":"90","author":"T Hester","year":"2013","unstructured":"Hester, T., Stone, P.: Texplore: real-time sample-efficient reinforcement learning for robots. Mach. Learn. 90, 385\u2013429 (2013)","journal-title":"Mach. Learn."},{"key":"37_CR8","unstructured":"Hubbs, C.D., Perez, H.D., Sarwar, O., Sahinidis, N.V., Grossmann, I.E., Wassick, J.M.: Or-gym: a reinforcement learning library for operations research problem. CoRR abs\/2008.06319 (2020)"},{"key":"37_CR9","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.eswa.2017.08.046","volume":"91","author":"A Kara","year":"2018","unstructured":"Kara, A., Dogan, I.: Reinforcement learning approaches for specifying ordering policies of perishable inventory systems. Expert Syst. Appl. 91, 150\u2013158 (2018)","journal-title":"Expert Syst. Appl."},{"key":"37_CR10","unstructured":"Konda, V., Tsitsiklis, J.: Actor-critic algorithms. In: Advances in Neural Information Processing Systems, vol. 12 (1999)"},{"key":"37_CR11","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"37_CR12","unstructured":"Lowe, R., Wu, Y.I., Tamar, A., Harb, J., Pieter Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"issue":"3","key":"37_CR13","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1007\/s00521-021-06129-w","volume":"34","author":"H Meisheri","year":"2022","unstructured":"Meisheri, H., et al.: Scalable multi-product inventory control with lead time constraints using reinforcement learning. Neural Comput. Appl. 34(3), 1735\u20131757 (2022)","journal-title":"Neural Comput. Appl."},{"key":"37_CR14","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1007\/1-4020-0611-X_582","volume-title":"Encyclopedia of Operations Research and Management Science","author":"DR Miller","year":"2001","unstructured":"Miller, D.R.: Markov processes. In: Gass, S.I., Harris, C.M. (eds.) Encyclopedia of Operations Research and Management Science, pp. 486\u2013490. Springer, New York (2001). https:\/\/doi.org\/10.1007\/1-4020-0611-X_582"},{"key":"37_CR15","doi-asserted-by":"crossref","unstructured":"Mittal, M., Shah, N.H.: Optimal Inventory Control and Management Techniques. IGI Global, Hershey (2016)","DOI":"10.4018\/978-1-4666-9888-8"},{"key":"37_CR16","unstructured":"NVIDIA, Vingelmann, P., Fitzek, F.H.: Cuda, release: 10.2.89 (2020). https:\/\/developer.nvidia.com\/cuda-toolkit"},{"key":"37_CR17","unstructured":"Okuta, R., Unno, Y., Nishino, D., Hido, S., Loomis, C.: Cupy: a numpy-compatible library for NVIDIA GPU calculations. In: Proceedings of Workshop on Machine Learning Systems (LearningSys) in the Thirty-first Annual Conference on Neural Information Processing Systems (NIPS) (2017)"},{"key":"37_CR18","unstructured":"Omidshafiei, S., Pazis, J., Amato, C., How, J.P., Vian, J.: Deep decentralized multi-task multi-agent reinforcement learning under partial observability. In: International Conference on Machine Learning, pp. 2681\u20132690. PMLR (2017)"},{"issue":"1","key":"37_CR19","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1287\/msom.2020.0939","volume":"24","author":"A Oroojlooyjadid","year":"2022","unstructured":"Oroojlooyjadid, A., Nazari, M., Snyder, L.V., Tak\u00e1\u010d, M.: A deep q-network for the beer game: deep reinforcement learning for inventory optimization. Manuf. Serv. Oper. Manag. 24(1), 285\u2013304 (2022)","journal-title":"Manuf. Serv. Oper. Manag."},{"key":"37_CR20","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1007\/s10458-005-2631-2","volume":"11","author":"L Panait","year":"2005","unstructured":"Panait, L., Luke, S.: Cooperative multi-agent learning: the state of the art. Auton. Agent. Multi-Agent Syst. 11, 387\u2013434 (2005)","journal-title":"Auton. Agent. Multi-Agent Syst."},{"key":"37_CR21","doi-asserted-by":"crossref","unstructured":"Peng, Z., Zhang, Y., Feng, Y., Zhang, T., Wu, Z., Su, H.: Deep reinforcement learning approach for capacitated supply chain optimization under demand uncertainty. In: 2019 Chinese Automation Congress (CAC), pp. 3512\u20133517. IEEE (2019)","DOI":"10.1109\/CAC48633.2019.8997498"},{"key":"37_CR22","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning, vol. 37, pp. 1889\u20131897. PMLR (2015)"},{"key":"37_CR23","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"37_CR24","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: Proceedings of the 31st International Conference on Machine Learning, vol. 32, pp. 387\u2013395. PMLR (2014)"},{"key":"37_CR25","unstructured":"Stranieri, F., Stella, F.: A deep reinforcement learning approach to supply chain inventory management. CoRR abs\/2204.09603 (2022)"},{"key":"37_CR26","unstructured":"Sultana, N.N., Meisheri, H., Baniwal, V., Nath, S., Ravindran, B., Khadilkar, H.: Reinforcement learning for multi-product multi-node inventory management in supply chains. CoRR abs\/2006.04037 (2020)"},{"key":"37_CR27","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"37_CR28","doi-asserted-by":"crossref","unstructured":"Tavakoli, A., Pardo, F., Kormushev, P.: Action branching architectures for deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"37_CR29","unstructured":"Yang, Y., et al.: Q-value path decomposition for deep multiagent reinforcement learning. In: Proceedings of the 37th International Conference on Machine Learning, vol. 119, pp. 10706\u201310715. PMLR (2020)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Applied Data Science and Demo Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43427-3_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T21:06:04Z","timestamp":1694898364000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43427-3_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434266","9783031434273"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43427-3_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"We do not foresee any scenario where our work would put a certain demographic or a specific organization to a systematic disadvantage. We do not use any personal data in our experiments. We do not anticipate our algorithm to be used in policing or military applications. Our work is only focused on developing a RL-based solution to effectively manage inventory and optimize costs in a multi-echelon supply chain system.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Impact"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}