{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:25:48Z","timestamp":1768073148444,"version":"3.49.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2022,3,22]],"date-time":"2022-03-22T00:00:00Z","timestamp":1647907200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,3,22]],"date-time":"2022-03-22T00:00:00Z","timestamp":1647907200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s11227-022-04425-3","type":"journal-article","created":{"date-parts":[[2022,3,22]],"date-time":"2022-03-22T15:12:32Z","timestamp":1647961952000},"page":"13653-13679","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Optimization of multi-class 0\/1 knapsack problem on GPUs by improving memory access efficiency"],"prefix":"10.1007","volume":"78","author":[{"given":"En-Ming","family":"Huang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jerry","family":"Chou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,3,22]]},"reference":[{"issue":"3731","key":"4425_CR1","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1126\/science.153.3731.34","volume":"153","author":"R Bellman","year":"1966","unstructured":"Bellman R (1966) Dynamic programming. Science 153(3731):34\u201337. https:\/\/doi.org\/10.1126\/science.153.3731.34","journal-title":"Science"},{"key":"4425_CR2","doi-asserted-by":"publisher","unstructured":"Boukedjar A, Lalami ME, El-Baz D (2012) Parallel branch and bound on a cpu-gpu system. In: 2012 20th Euromicro International Conference on Parallel, Distributed and Network-based Processing, pp. 392\u2013398. https:\/\/doi.org\/10.1109\/PDP.2012.23","DOI":"10.1109\/PDP.2012.23"},{"issue":"1","key":"4425_CR3","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1016\/j.cor.2011.03.014","volume":"39","author":"V Boyer","year":"2012","unstructured":"Boyer V, El Baz D, Elkihel M (2012) Solving knapsack problems on gpu. Comput Op Res 39(1):42\u201347","journal-title":"Comput Op Res"},{"key":"4425_CR4","doi-asserted-by":"publisher","unstructured":"Carneiro T, Muritiba AE, Negreiros M, Lima de Campos GA (2011) A new parallel schema for branch-and-bound algorithms using gpgpu. In: 2011 23rd International Symposium on Computer Architecture and High Performance Computing, pp. 41\u201347. https:\/\/doi.org\/10.1109\/SBAC-PAD.2011.20","DOI":"10.1109\/SBAC-PAD.2011.20"},{"key":"4425_CR5","doi-asserted-by":"publisher","unstructured":"Ding N, Williams S (2019) An instruction roofline model for gpus. In: 2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS), pp. 7\u201318. https:\/\/doi.org\/10.1109\/PMBS49563.2019.00007","DOI":"10.1109\/PMBS49563.2019.00007"},{"key":"4425_CR6","volume-title":"Computers and intractability; a guide to the theory of NP-completeness","author":"MR Garey","year":"1990","unstructured":"Garey MR, Johnson DS (1990) Computers and intractability; a guide to the theory of NP-completeness. W. H Freeman & Co., New York"},{"key":"4425_CR7","doi-asserted-by":"publisher","unstructured":"Hajarian M, Shahbahrami A, Hoseini F (2016) A parallel solution for the 0-1 knapsack problem using firefly algorithm. In: 1st Conference on Swarm Intelligence and Evolutionary Computation (CSIEC), pp. 25\u201330. https:\/\/doi.org\/10.1109\/CSIEC.2016.7482134","DOI":"10.1109\/CSIEC.2016.7482134"},{"key":"4425_CR8","unstructured":"HPC Advisory Council: The Top 500 List (2021). https:\/\/www.top500.org\/lists\/top500\/2021\/06\/"},{"key":"4425_CR9","doi-asserted-by":"publisher","unstructured":"Huang S, Xiao S, Feng W (2009) On the energy efficiency of graphics processing units for scientific computing. In: 2009 IEEE International Symposium on Parallel Distributed Processing, pp. 1\u20138. https:\/\/doi.org\/10.1109\/IPDPS.2009.5160980","DOI":"10.1109\/IPDPS.2009.5160980"},{"key":"4425_CR10","doi-asserted-by":"crossref","unstructured":"Kelly T (2005) Generalized knapsack solvers for multi-unit combinatorial auctions: Analysis and application to computational resource allocation. In: P.\u00a0Faratin, J.A. Rodr\u00edguez-Aguilar (eds.) Agent-Mediated Electronic Commerce VI. Theories for and Engineering of Distributed Mechanisms and Systems, pp. 73\u201386. Springer Berlin Heidelberg, Berlin, Heidelberg","DOI":"10.1007\/11575726_6"},{"key":"4425_CR11","doi-asserted-by":"publisher","unstructured":"Konstantinidis E, Cotronis Y (2015) A practical performance model for compute and memory bound gpu kernels. In: 2015 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, pp. 651\u2013658. https:\/\/doi.org\/10.1109\/PDP.2015.51","DOI":"10.1109\/PDP.2015.51"},{"key":"4425_CR12","doi-asserted-by":"publisher","unstructured":"Kumaraguruparan N, Sivaramakrishnan H, Sapatnekar SS (2012) Residential task scheduling under dynamic pricing using the multiple knapsack method. In: 2012 IEEE PES Innovative Smart Grid Technologies (ISGT), pp. 1\u20136. https:\/\/doi.org\/10.1109\/ISGT.2012.6175656","DOI":"10.1109\/ISGT.2012.6175656"},{"key":"4425_CR13","doi-asserted-by":"publisher","unstructured":"Lalami ME, El-Baz D (2012) Gpu implementation of the branch and bound method for knapsack problems. In: IEEE 26th International Parallel and Distributed Processing Symposium Workshops PhD Forum, pp. 1769\u20131777. https:\/\/doi.org\/10.1109\/IPDPSW.2012.219","DOI":"10.1109\/IPDPSW.2012.219"},{"issue":"4","key":"4425_CR14","doi-asserted-by":"publisher","first-page":"438","DOI":"10.1016\/0743-7315(88)90007-X","volume":"5","author":"J Lee","year":"1988","unstructured":"Lee J, Shragowitz E, Sahni S (1988) A hypercube algorithm for the 0\/1 knapsack problem. J Parallel Distrib Comput 5(4):438\u2013456. https:\/\/doi.org\/10.1016\/0743-7315(88)90007-X","journal-title":"J Parallel Distrib Comput"},{"issue":"3","key":"4425_CR15","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1016\/0743-7315(91)90080-S","volume":"13","author":"J Lin","year":"1991","unstructured":"Lin J, Storer JA (1991) Processor-efficient hypercube algorithms for the knapsack problem. J Parallel Distrib Comput 13(3):332\u2013337. https:\/\/doi.org\/10.1016\/0743-7315(91)90080-S","journal-title":"J Parallel Distrib Comput"},{"issue":"2","key":"4425_CR16","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/s11265-008-0315-2","volume":"57","author":"H Liu","year":"2009","unstructured":"Liu H, Shao Z, Wang M, Du J, Xue CJ, Jia Z (2009) Combining coarse-grained software pipelining with dvs for scheduling real-time periodic dependent tasks on multi-core embedded systems. J Signal Process Syst 57(2):249\u2013262. https:\/\/doi.org\/10.1007\/s11265-008-0315-2","journal-title":"J Signal Process Syst"},{"key":"4425_CR17","unstructured":"National Center for High-performance Computing: TAIWANIA2 (2018). https:\/\/www.nchc.org.tw\/"},{"key":"4425_CR18","doi-asserted-by":"publisher","unstructured":"Nawaz Z, Stefanov T, Bertels K (2009) Efficient hardware generation for dynamic programming problems. In: 2009 International Conference on Field-Programmable Technology, pp. 348\u2013352. https:\/\/doi.org\/10.1109\/FPT.2009.5377618","DOI":"10.1109\/FPT.2009.5377618"},{"key":"4425_CR19","unstructured":"NVIDIA: NVIDIA A100 datasheet (2020). https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/a100\/pdf\/nvidia-a100-datasheet.pdf"},{"key":"4425_CR20","unstructured":"NVIDIA: Cuda c++ programming guide (2021). https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_C_Programming_Guide.pdf"},{"key":"4425_CR21","unstructured":"Oak Ridge National Laboratory: SUMMIT (2018). https:\/\/www.olcf.ornl.gov\/olcf-resources\/compute-systems\/summit\/"},{"key":"4425_CR22","doi-asserted-by":"publisher","unstructured":"O\u2019Connell JF, Mumford CL (2014) An exact dynamic programming based method to solve optimisation problems using gpus. In: Second International Symposium on Computing and Networking, pp. 347\u2013353. https:\/\/doi.org\/10.1109\/CANDAR.2014.27","DOI":"10.1109\/CANDAR.2014.27"},{"key":"4425_CR23","doi-asserted-by":"crossref","unstructured":"Odlyzko AM (1990) The rise and fall of knapsack cryptosystems. In: In Cryptology and Computational Number Theory, pp. 75\u201388. A.M.S","DOI":"10.1090\/psapm\/042\/1095552"},{"key":"4425_CR24","first-page":"139","volume":"4","author":"DE O\u2019Leary","year":"1995","unstructured":"O\u2019Leary DE (1995) Financial planning with 0\u20131 knapsack problems, part i: domination results. Adv Math Program Financ Plan 4:139\u2013150","journal-title":"Adv Math Program Financ Plan"},{"key":"4425_CR25","unstructured":"Pospichal P, Schwarz J, Jaros J (2010) Parallel genetic algorithm solving 0\/1 knapsack problem running on the gpu. In: Proceedings of the 16th International Conference on Soft Computing (MENDEL), pp. 64\u201370"},{"issue":"1","key":"4425_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.ejor.2019.11.033","volume":"287","author":"G Schryen","year":"2020","unstructured":"Schryen G (2020) Parallel computational optimization in operations research: a new integrative framework, literature review and research directions. Eur J Oper Res 287(1):1\u201318. https:\/\/doi.org\/10.1016\/j.ejor.2019.11.033","journal-title":"Eur J Oper Res"},{"key":"4425_CR27","doi-asserted-by":"publisher","unstructured":"Shen J, Shigeoka K, Ino F, Hagihara K (2017) An out-of-core branch and bound method for solving the 0-1 knapsack problem on a gpu. In: International Conference on Algorithms and Architectures for Parallel Processing, pp. 254\u2013267. https:\/\/doi.org\/10.1007\/978-3-319-65482-9_17","DOI":"10.1007\/978-3-319-65482-9_17"},{"issue":"4","key":"4425_CR28","doi-asserted-by":"publisher","first-page":"e4954","DOI":"10.1002\/cpe.4954","volume":"31","author":"J Shen","year":"2019","unstructured":"Shen J, Shigeoka K, Ino F, Hagihara K (2019) Gpu-based branch-and-bound method to solve large 0\u20131 knapsack problems with data-centric strategies. Concurr Comput Pract Exp 31(4):e4954","journal-title":"Concurr Comput Pract Exp"},{"issue":"4","key":"4425_CR29","doi-asserted-by":"publisher","first-page":"83","DOI":"10.4018\/IJGHPC.2018100105","volume":"10","author":"X Sun","year":"2018","unstructured":"Sun X, Wu CC, Chen LR, Lin JY (2018) Using inter-block synchronization to improve the knapsack problem on gpus. Int J Grid High Perform Comput (IJGHPC) 10(4):83\u201398","journal-title":"Int J Grid High Perform Comput (IJGHPC)"},{"key":"4425_CR30","doi-asserted-by":"publisher","unstructured":"Suri B, Bordoloi UD, Eles P (2012) A scalable gpu-based approach to accelerate the multiple-choice knapsack problem. In: Design, Automation Test in Europe Conference Exhibition (DATE), pp. 1126\u20131129. https:\/\/doi.org\/10.1109\/DATE.2012.6176665","DOI":"10.1109\/DATE.2012.6176665"},{"key":"4425_CR31","doi-asserted-by":"publisher","unstructured":"Thant Sin ST (2021) The parallel processing approach to the dynamic programming algorithm of knapsack problem. In: 2021 IEEE Conference of Russian Young Researchers in Electrical and Electronic Engineering (ElConRus), pp. 2252\u20132256. https:\/\/doi.org\/10.1109\/ElConRus51938.2021.9396489","DOI":"10.1109\/ElConRus51938.2021.9396489"},{"key":"4425_CR32","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/BF02243880","volume":"25","author":"P Toth","year":"1980","unstructured":"Toth P (1980) Dynamic programming algorithms for the zero-one knapsack problem. Computing 25:29\u201345","journal-title":"Computing"},{"key":"4425_CR33","unstructured":"Ulm DR, Baker JW (1996) Solving a 2d knapsack problem on an associative computer augmented with a linear network. In: in Proc. of the International Conference on Parallel and Distributed Processing Techniques and Applications, pp. 29\u201332"},{"issue":"12","key":"4425_CR34","doi-asserted-by":"publisher","first-page":"2865","DOI":"10.1109\/TPDS.2020.3004623","volume":"31","author":"Q Wang","year":"2020","unstructured":"Wang Q, Chu X (2020) Gpgpu performance estimation with core and memory frequency scaling. IEEE Trans Parallel Distrib Syst 31(12):2865\u20132881. https:\/\/doi.org\/10.1109\/TPDS.2020.3004623","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"4425_CR35","doi-asserted-by":"publisher","unstructured":"Wen H, Zhang W (2015) Exploring shared memory and cache to improve gpu performance and energy efficiency. In: Sixteenth International Symposium on Quality Electronic Design, pp. 402\u2013405. https:\/\/doi.org\/10.1109\/ISQED.2015.7085459","DOI":"10.1109\/ISQED.2015.7085459"},{"issue":"4","key":"4425_CR36","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams S, Waterman A, Patterson D (2009) Roofline: an insightful visual performance model for multicore architectures. Commun ACM 52(4):65\u201376. https:\/\/doi.org\/10.1145\/1498765.1498785","journal-title":"Commun ACM"},{"key":"4425_CR37","doi-asserted-by":"publisher","unstructured":"Xiao S, Feng Wc (2010) Inter-block gpu communication via fast barrier synchronization. In: 2010 IEEE International Symposium on Parallel Distributed Processing (IPDPS), pp. 1\u201312. https:\/\/doi.org\/10.1109\/IPDPS.2010.5470477","DOI":"10.1109\/IPDPS.2010.5470477"},{"key":"4425_CR38","doi-asserted-by":"publisher","unstructured":"You Y, Zhang Z, Hsieh CJ, Demmel J, Keutzer K (2018) Imagenet training in minutes. In: Proceedings of the 47th International Conference on Parallel Processing, ICPP 2018, pp. 1\u201310. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/3225058.3225069","DOI":"10.1145\/3225058.3225069"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-022-04425-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-022-04425-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-022-04425-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,4]],"date-time":"2022-07-04T14:13:14Z","timestamp":1656943994000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-022-04425-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,22]]},"references-count":38,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["4425"],"URL":"https:\/\/doi.org\/10.1007\/s11227-022-04425-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3,22]]},"assertion":[{"value":"22 February 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 March 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}