{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T12:41:10Z","timestamp":1768653670329,"version":"3.49.0"},"reference-count":26,"publisher":"Tech Science Press","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.062980","type":"journal-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T02:40:46Z","timestamp":1745289646000},"page":"919-936","source":"Crossref","is-referenced-by-count":1,"title":["Reinforcement Learning for Solving the Knapsack Problem"],"prefix":"10.32604","volume":"84","author":[{"given":"Zhenfu","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Haiyan","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Liudong","family":"Zuo","sequence":"additional","affiliation":[]},{"given":"Pan","family":"Lai","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"427","DOI":"10.1007\/s13042-014-0272-y","article-title":"Solving 0-1 knapsack problem using cohort intelligence algorithm","volume":"7","author":"Kulkarni","year":"2016","journal-title":"Int J Mach Learn Cybern"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"1424","DOI":"10.1016\/j.ejor.2022.06.029","article-title":"The polynomial robust knapsack problem","volume":"305","author":"Baldo","year":"2023","journal-title":"Eur J Oper Res"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"378","DOI":"10.1016\/j.matcom.2023.12.033","article-title":"Modeling and solving of knapsack problem with setup based on evolutionary algorithm","volume":"219","author":"He","year":"2024","journal-title":"Math Comput Simul"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/S0377-2217(03)00274-1","article-title":"The multidimensional 0\u20131 knapsack problem: an overview","volume":"155","author":"Fr\u00e9ville","year":"2004","journal-title":"Eur J Oper Res"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"LeCun","year":"2015","journal-title":"Nature"},{"key":"ref6","author":"Sutton","year":"2018","journal-title":"Reinforcement learning: an introduction"},{"key":"ref7","first-page":"111","author":"Ernst","year":"2024","journal-title":"Introduction to reinforcement learning"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of Go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1007\/s11023-007-9079-x","article-title":"Universal intelligence: a definition of machine intelligence","volume":"17","author":"Legg","year":"2007","journal-title":"Mines Mach"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"e6509","DOI":"10.1002\/cpe.6509","article-title":"Reinforcement learning using fully connected, attention, and transformer models in knapsack problem solving","volume":"34","author":"Yildiz","year":"2022","journal-title":"Concurr Comput"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1016\/S0304-0208(08)73237-7","author":"Martello","year":"1987","journal-title":"North-holland mathematics studies"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"1827","DOI":"10.1016\/j.dam.2010.04.006","article-title":"On two-stage stochastic knapsack problems","volume":"159","author":"Kosuch","year":"2011","journal-title":"Discrete Appl Math"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1023\/A:1009642405419","article-title":"A genetic algorithm for the multidimensional knapsack problem","volume":"4","author":"Chu","year":"1998","journal-title":"J Heuristics"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1145\/321864.321873","article-title":"Approximate algorithms for the 0\/1 knapsack problem","volume":"22","author":"Sahni","year":"1975","journal-title":"J ACM"},{"key":"ref16","unstructured":"Bello I, Pham H, Le QV, Norouzi M, Bengio S. Neural combinatorial optimization with reinforcement learning. arXiv:1611.09940. 2016."},{"key":"ref17","series-title":"Advances in Neural Information Processing Systems","article-title":"Reinforcement learning for solving the vehicle routing problem","author":"Nazari","year":"2018 Dec 3\u20138"},{"key":"ref18","series-title":"Advances in Neural Information Processing Systems","article-title":"Learning combinatorial optimization algorithms over graphs","author":"Dai","year":"2017 Dec 4\u20139"},{"key":"ref19","series-title":"Proceedings of the 37th International Conference on Machine Learning","article-title":"Stabilizing transformers for reinforcement learning","author":"Parisotto","year":"2020 Jul 13\u201318"},{"key":"ref20","series-title":"2018 Tenth International Conference on Advanced Computational Intelligence (ICACI)","article-title":"A pointer network based deep learning algorithm for 0\u20131 Knapsack Problem","author":"Gu","year":"2018 Mar 29\u201331"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1016\/j.eswa.2018.09.004","article-title":"Neuroevolution for solving multiobjective knapsack problems","volume":"116","author":"Denysiuk","year":"2019","journal-title":"Expert Syst Appl"},{"key":"ref22","series-title":"International Conference on Learning Representations","article-title":"Noisy networks for exploration","author":"Fortunato","year":"2018 Apr 30\u2013May 3"},{"key":"ref23","series-title":"International Conference on Machine Learning","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"Wang","year":"2016 Jun 19\u20134"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1007\/978-3-030-60990-0_12","author":"Zhang","year":"2021","journal-title":"Handbook of reinforcement learning and control"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"13344","DOI":"10.1109\/TPAMI.2023.3292075","article-title":"Transfer learning in deep reinforcement learning: a survey","volume":"45","author":"Zhu","year":"2023","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref26","first-page":"12445","article-title":"Combining reinforcement learning with Lin-kernighan-helsgaun algorithm for the traveling salesman problem","volume":"35","author":"Zheng","year":"2021","journal-title":"Proc AAAI Conf Artif Intell"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-84-1\/TSP_CMC_62980\/TSP_CMC_62980.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:36:15Z","timestamp":1763343375000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v84n1\/61726"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":26,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.062980","relation":{},"ISSN":["1546-2226"],"issn-type":[{"value":"1546-2226","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}