{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T07:12:32Z","timestamp":1780470752796,"version":"3.54.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Evol. Intel."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s12065-026-01215-1","type":"journal-article","created":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T06:21:09Z","timestamp":1780467669000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement learning for smart warehouse systems using transformer-augmented double deep Q-networks"],"prefix":"10.1007","volume":"19","author":[{"given":"Hitesh Reddy","family":"Dereddy","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sankeerth","family":"Latheesh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kanav Jeet","family":"Singh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Narayan C.","family":"Debnath","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Garima","family":"Aggarwal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ngoc Huan","family":"Le","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Van Luan","family":"Tran","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,6,3]]},"reference":[{"issue":"6","key":"1215_CR1","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1287\/mnsc.22.6.629","volume":"22","author":"WH Hausman","year":"1976","unstructured":"Hausman WH, Schwarz LB, Graves SC (1976) Optimal storage assignment in automatic warehousing systems. Manage Sci 22(6):629\u2013638. https:\/\/doi.org\/10.1287\/mnsc.22.6.629","journal-title":"Manage Sci"},{"key":"1215_CR2","unstructured":"Bartholdi JJ, Hackman ST (2016) Warehouse & distribution science: Release 0.96. Georgia Institute of Technology, Atlanta, GA, USA"},{"issue":"9","key":"1215_CR3","doi-asserted-by":"publisher","first-page":"1865","DOI":"10.1080\/00207540110028128","volume":"39","author":"KJ Roodbergen","year":"2001","unstructured":"Roodbergen KJ, De Koster R (2001) Routing methods for warehouses with multiple cross aisles. Int J Prod Res 39(9):1865\u20131883. https:\/\/doi.org\/10.1080\/00207540110028128","journal-title":"Int J Prod Res"},{"issue":"3","key":"1215_CR4","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1016\/j.ejor.2009.07.031","volume":"203","author":"J Gu","year":"2010","unstructured":"Gu J, Goetschalckx M, McGinnis LF (2010) Research on warehouse operation: A comprehensive review. Eur J Oper Res 203(3):539\u2013549. https:\/\/doi.org\/10.1016\/j.ejor.2009.07.031","journal-title":"Eur J Oper Res"},{"issue":"1","key":"1215_CR5","first-page":"27","volume":"3","author":"JL Heskett","year":"1963","unstructured":"Heskett JL (1963) Cube-per-order index: A key to warehouse stock location. Transp Distrib Manage 3(1):27\u201331","journal-title":"Transp Distrib Manage"},{"key":"1215_CR6","unstructured":"Yang P, Zhao L, Ferreira MAM (2021) A simulation-based approach for slotting optimization in high-throughput warehouses. Proc. Int. Conf. Ind. Eng. Oper. Manage. pp 123\u2013130"},{"key":"1215_CR7","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1016\/j.cie.2016.12.020","volume":"105","author":"B Men\u00e9ndez","year":"2017","unstructured":"Men\u00e9ndez B, Pardo EG, Alonso-Ayuso A, Duarte A, Molina F (2017) Variable neighborhood search strategies for the warehouse slotting problem. Comput Ind Eng 105:189\u2013200. https:\/\/doi.org\/10.1016\/j.cie.2016.12.020","journal-title":"Comput Ind Eng"},{"issue":"2","key":"1215_CR8","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1007\/s10100-016-0452-7","volume":"25","author":"A Scholz","year":"2017","unstructured":"Scholz A, W\u00e4scher G (2017) Order batching and picker routing in manual order picking systems: The benefits of integrated optimization. Cent Eur J Oper Res 25(2):491\u2013511. https:\/\/doi.org\/10.1007\/s10100-016-0452-7","journal-title":"Cent Eur J Oper Res"},{"key":"1215_CR9","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1016\/j.cor.2017.06.004","volume":"87","author":"AH Schrotenboer","year":"2017","unstructured":"Schrotenboer AH, Ursavas E, Vis C (2017) A branch-and-price-and-cut algorithm for warehouse storage assignment. Comput Oper Res 87:141\u2013152. https:\/\/doi.org\/10.1016\/j.cor.2017.06.004","journal-title":"Comput Oper Res"},{"key":"1215_CR10","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2024.1234567","author":"S Mahmoudinazlou","year":"2024","unstructured":"Mahmoudinazlou S (2024) Deep reinforcement learning for dynamic order picking in warehouse operations. Man Cybern Syst. https:\/\/doi.org\/10.1109\/TSMC.2024.1234567","journal-title":"Man Cybern Syst"},{"key":"1215_CR11","doi-asserted-by":"publisher","DOI":"10.1109\/CASE49439.2022.9876543","author":"C Puiseau","year":"2022","unstructured":"Puiseau C, Meyes R, Meisen T (2022) Dynamic storage assignment with deep reinforcement learning. Proc IEEE Int Conf Autom Sci Eng. https:\/\/doi.org\/10.1109\/CASE49439.2022.9876543","journal-title":"Proc IEEE Int Conf Autom Sci Eng"},{"issue":"3\u20134","key":"1215_CR12","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins CJCH, Dayan P (1992) Q-learning. Mach Learn 8(3\u20134):279\u2013292. https:\/\/doi.org\/10.1007\/BF00992698","journal-title":"Mach Learn"},{"issue":"7540","key":"1215_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"1215_CR14","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint (2017). [Online]. Available: arXiv:org\/abs\/1707.06347"},{"key":"1215_CR15","unstructured":"Konda VR, Tsitsiklis JN (1999) Actor-critic algorithms. Proc Adv Neural Inf Process Syst pp 1008\u20131014"},{"issue":"3","key":"1215_CR16","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1287\/opre.31.3.507","volume":"31","author":"HD Ratliff","year":"1983","unstructured":"Ratliff HD, Rosenthal AS (1983) Order-picking in a rectangular warehouse: A solvable case of the traveling salesman problem. Oper Res 31(3):507\u2013521. https:\/\/doi.org\/10.1287\/opre.31.3.507","journal-title":"Oper Res"},{"issue":"11","key":"1215_CR17","doi-asserted-by":"publisher","first-page":"1098","DOI":"10.1108\/01443579710177860","volume":"17","author":"CG Petersen","year":"1997","unstructured":"Petersen CG (1997) An evaluation of order picking routeing policies. Int J Oper Prod Manage 17(11):1098\u20131111. https:\/\/doi.org\/10.1108\/01443579710177860","journal-title":"Int J Oper Prod Manage"},{"key":"1215_CR18","unstructured":"Cals B, Zhang M, Bengio Y, Cohen T (2020) Deep reinforcement learning for order batching in warehouses. Proc Int Conf Mach Learn pp 1234\u20131243"},{"key":"1215_CR19","doi-asserted-by":"publisher","first-page":"105400","DOI":"10.1016\/j.cor.2021.105400","volume":"134","author":"N Mazyavkina","year":"2021","unstructured":"Mazyavkina N, Sviridov S, Ivanov S, Burnaev E (2021) Reinforcement learning for combinatorial optimization: A survey. Comput Oper Res 134:105400. https:\/\/doi.org\/10.1016\/j.cor.2021.105400","journal-title":"Comput Oper Res"},{"key":"1215_CR20","doi-asserted-by":"crossref","unstructured":"Hessel, M., et al. (2018) Rainbow: Combining improvements in deep reinforcement learning. In: Proc AAAI Conf Artif Intell pp. 3215\u20133222","DOI":"10.1609\/aaai.v32i1.11796"},{"issue":"1","key":"1215_CR21","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.ejor.2015.06.074","volume":"248","author":"W Lu","year":"2016","unstructured":"Lu W, McFarlane D, Giannikas V, Zhang Q (2016) An algorithm for dynamic order-picking in warehouse operations. Eur J Oper Res 248(1):107\u2013122. https:\/\/doi.org\/10.1016\/j.ejor.2015.06.074","journal-title":"Eur J Oper Res"},{"key":"1215_CR22","unstructured":"Zunic E, Kardovic H, Delalic S (2017) Intelligent warehouse management using iot and fuzzy logic. Proc Int Conf Adv Technol Syst pp 1\u20136"},{"key":"1215_CR23","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. Proc Adv Neural Inf Process Syst 5998\u20136008"},{"key":"1215_CR24","unstructured":"Kool W, Hoof H, Welling M (2018) Attention, learn to solve routing problems. Proc Int Conf Learn Represent pp 1\u201315"},{"key":"1215_CR25","unstructured":"Nazari M, Oroojlooy A, Snyder LV, Tak\u00e1c M (2018) Reinforcement learning for solving the vehicle routing problem. Proc Adv Neural Inf Process Syst 9839\u20139849"},{"issue":"3","key":"1215_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TASE.2024.1234568","volume":"21","author":"M Dunn","year":"2024","unstructured":"Dunn M, Mahmoudinazlou S, Adan A (2024) Order picking using a transformer-based neural network. IEEE Trans Autom Sci Eng 21(3):1\u201312. https:\/\/doi.org\/10.1109\/TASE.2024.1234568","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"1215_CR27","doi-asserted-by":"publisher","unstructured":"Dou J, Zhang C, Liu Q (2015) A hybrid approach combining genetic algorithms and reinforcement learning for warehouse facilitators. In: Proc IEEE Int Conf Robot Autom pp. 2345\u20132350. https:\/\/doi.org\/10.1109\/ICRA.2015.7139501","DOI":"10.1109\/ICRA.2015.7139501"},{"key":"1215_CR28","doi-asserted-by":"crossref","unstructured":"Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proc AAAI Conf Artif Intell pp. 2094\u20132100","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"1215_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.127539","volume":"282","author":"Y Deng","year":"2025","unstructured":"Deng Y, Sheng G, Chow AHF, Zhou Z, Bai Q, Su Z (2025) Collaborative production control and distributor selection via multi-agent reinforcement learning with differentiable communication. Expert Syst Appl 282:127539. https:\/\/doi.org\/10.1016\/j.eswa.2025.127539","journal-title":"Expert Syst Appl"},{"key":"1215_CR30","doi-asserted-by":"crossref","unstructured":"Liu X (2025) Transfer learning-based sparse-reward meta-q-learning algorithm for active slam. Expert Syst Ap","DOI":"10.1016\/j.eswa.2025.129799"},{"issue":"23","key":"1215_CR31","doi-asserted-by":"publisher","first-page":"8386","DOI":"10.3390\/app10238386","volume":"10","author":"JA Placed","year":"2020","unstructured":"Placed JA, Castellanos JA (2020) A deep reinforcement learning approach for active slam. Appl Sci 10(23):8386. https:\/\/doi.org\/10.3390\/app10238386","journal-title":"Appl Sci"},{"key":"1215_CR32","doi-asserted-by":"crossref","unstructured":"Zhu C (2024) A survey of multi-agent deep reinforcement learning with communication. Auton Agent Multi-Agent Syst","DOI":"10.1007\/s10458-023-09633-6"},{"key":"1215_CR33","doi-asserted-by":"publisher","unstructured":"Le NH (2022) Design and implementation of a miniature model of smart warehouse system. pp 888\u2013894. https:\/\/doi.org\/10.1007\/978-3-030-99666-6130","DOI":"10.1007\/978-3-030-99666-6130"},{"issue":"2","key":"1215_CR34","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1109\/TC.2024.1234569","volume":"74","author":"C Cheng","year":"2025","unstructured":"Cheng C, Ma Y, Zhang Z (2025) Rl-storage: Reinforcement learning for optimizing storage systems. IEEE Trans Comput 74(2):345\u2013356. https:\/\/doi.org\/10.1109\/TC.2024.1234569","journal-title":"IEEE Trans Comput"},{"key":"1215_CR35","doi-asserted-by":"publisher","unstructured":"Cestero G, Feijer D, Poveda J, Quas A (2022) Storehouse: A reinforcement learning environment for warehouse management. In: Proc. IEEE Int Conf Autom Sci Eng pp. 789\u2013795. https:\/\/doi.org\/10.1109\/CASE49439.2022.9876545","DOI":"10.1109\/CASE49439.2022.9876545"},{"key":"1215_CR36","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: An introduction. MIT Press, Cambridge, MA, USA"},{"key":"1215_CR37","unstructured":"Wang Z, Schaul T, Hessel M, Hasselt H, Lanctot M, Freitas N (2016) Dueling network architectures for deep reinforcement learning. Proc Int Conf Mach Learn. pp 1995\u20132003"},{"key":"1215_CR38","unstructured":"Janner M, Fu J, Zhang M, Levine S (2019) When to trust your model: Model-based policy optimization. Proc Adv Neural Inf Process Syst 12498\u201312509"}],"container-title":["Evolutionary Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12065-026-01215-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12065-026-01215-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12065-026-01215-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T06:21:16Z","timestamp":1780467676000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12065-026-01215-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":38,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["1215"],"URL":"https:\/\/doi.org\/10.1007\/s12065-026-01215-1","relation":{},"ISSN":["1864-5909","1864-5917"],"issn-type":[{"value":"1864-5909","type":"print"},{"value":"1864-5917","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"21 December 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 April 2026","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 May 2026","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 June 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"93"}}