{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T14:20:17Z","timestamp":1769350817833,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,24]],"date-time":"2024-05-24T00:00:00Z","timestamp":1716508800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"College of Information Science and Technology, Beijing University of Chemical Technology","award":["11170044127"],"award-info":[{"award-number":["11170044127"]}]},{"name":"the Central Universities","award":["ZY2412"],"award-info":[{"award-number":["ZY2412"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,24]]},"DOI":"10.1145\/3674658.3674689","type":"proceedings-article","created":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T22:07:19Z","timestamp":1731967639000},"page":"196-203","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["An Improved Strategy for Blood Glucose Control Using Multi-Step Deep Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0566-933X","authenticated-orcid":false,"given":"Senquan","family":"Wang","sequence":"first","affiliation":[{"name":"College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5177-5730","authenticated-orcid":false,"given":"Weiwei","family":"Gu","sequence":"additional","affiliation":[{"name":"College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,11,18]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Kanyin\u00a0Liane Ong Lauryn\u00a0K Stafford Susan\u00a0A McLaughlin Edward\u00a0J Boyko Stein\u00a0Emil Vollset Amanda\u00a0E Smith Bronte\u00a0E Dalton Joe Duprey Jessica\u00a0A Cruz Hailey Hagins et\u00a0al. Global regional and national burden of diabetes from 1990 to 2021. The Lancet 402(10397):203\u2013234 2023."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Miguel Tejedor Ashenafi\u00a0Zebene Woldaregay and Fred Godtliebsen. Reinforcement learning application in diabetes blood glucose control: A systematic review. Artificial intelligence in medicine 104(10397):101836 2020.","DOI":"10.1016\/j.artmed.2020.101836"},{"key":"e_1_3_3_1_4_2","unstructured":"Intenational\u00a0Diabetes Federation. Idf diabetes atlas tenth. International Diabetes 2021."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Eleni Bekiari Konstantinos Kitsios Hood Thabit Martin Tauschmann Eleni Athanasiadou Thomas Karagiannis Anna-Bettina Haidich Roman Hovorka and Apostolos Tsapas. Artificial pancreas treatment for outpatients with type 1 diabetes: systematic review and meta-analysis. bmj 361 2018.","DOI":"10.1136\/bmj.k1310"},{"key":"e_1_3_3_1_6_2","unstructured":"Ian Fox Joyce Lee Rodica Pop-Busui and Jenna Wiens. Deep reinforcement learning for closed-loop blood glucose control. In Finale Doshi-Velez Jim Fackler Ken Jung David Kale Rajesh Ranganath Byron Wallace and Jenna Wiens editors Proceedings of the 5th Machine Learning for Healthcare Conference volume 126 of Proceedings of Machine Learning Research pages 508\u2013536. PMLR PMLR 07\u201308 Aug 2020."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Satish\u00a0K Garg Stuart\u00a0A Weinzimer William\u00a0V Tamborlane Bruce\u00a0A Buckingham Bruce\u00a0W Bode Timothy\u00a0S Bailey Ronald\u00a0L Brazg Jacob Ilany Robert\u00a0H Slover Stacey\u00a0M Anderson et\u00a0al. Glucose outcomes with the in-home use of a hybrid closed-loop insulin delivery system in adolescents and adults with type 1 diabetes. Diabetes technology & therapeutics 19(3):155\u2013163 2017.","DOI":"10.1089\/dia.2016.0421"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Sohaib Mehmood Imran Ahmad Hadeeqa Arif Umm\u00a0E Ammara and Abdul Majeed. Artificial pancreas control strategies used for type 1 diabetes control and treatment: a comprehensive analysis. Applied System Innovation 3(3):31 2020.","DOI":"10.3390\/asi3030031"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Melanie\u00a0K Bothe Luke Dickens Katrin Reichel Arn Tellmann Bj\u00f6rn Ellger Martin Westphal and Ahmed\u00a0A Faisal. The use of reinforcement learning algorithms to meet the challenges of an artificial pancreas. Expert review of medical devices 10(5):661\u2013673 2013.","DOI":"10.1586\/17434440.2013.827515"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Charlotte\u00a0K Boughton and Roman Hovorka. New closed-loop insulin systems. Diabetologia 64:1007\u20131015 2021.","DOI":"10.1007\/s00125-021-05391-w"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Miguel Tejedor Sigurd\u00a0Nordtveit Hjerde Jonas\u00a0Nordhaug Myhre and Fred Godtliebsen. Evaluating deep q-learning algorithms for controlling blood glucose in in silico type 1 diabetes. Diagnostics 13(19):3150 2023.","DOI":"10.3390\/diagnostics13193150"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Benjamin Ribba Sherri Dudal Thierry Lav\u00e9 and Richard\u00a0W Peck. Model-informed artificial intelligence: reinforcement learning for precision dosing. Clinical Pharmacology & Therapeutics 107(4):853\u2013857 2020.","DOI":"10.1002\/cpt.1777"},{"key":"e_1_3_3_1_13_2","unstructured":"Ian Fox and Jenna Wiens. Reinforcement learning for blood glucose control: Challenges and opportunities 2019."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Jonas Nordhaug\u00a0Myhre Miguel Tejedor Ilkka Kalervo\u00a0Launonen Anas El\u00a0Fathi and Fred Godtliebsen. In-silico evaluation of glucose regulation using policy gradient reinforcement learning for patients with type 1 diabetes mellitus. Applied Sciences 10(18):6350 2020.","DOI":"10.3390\/app10186350"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Jinhao Zhu Yinjia Zhang Weixiong Rao Qinpei Zhao Jiangfeng Li and Congrong Wang. Reinforcement learning for diabetes blood glucose control with meal information. In Bioinformatics Research and Applications: 17th International Symposium ISBRA 2021 Shenzhen China November 26\u201328 2021 Proceedings 17 pages 80\u201391. Springer 2021.","DOI":"10.1007\/978-3-030-91415-8_8"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Francesco Di\u00a0Felice Alessandro Borri and Maria\u00a0Domenica Di\u00a0Benedetto. Deep reinforcement learning for closed-loop blood glucose control: two approaches. IFAC-PapersOnLine 55(40):115\u2013120 2022.","DOI":"10.1016\/j.ifacol.2023.01.058"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Sumana Basu Marc-Andr\u00e9 Legault Adriana Romero-Soriano and Doina Precup. On the challenges of using reinforcement learning in precision drug dosing: Delay and prolongedness of action effectss 2023.","DOI":"10.1609\/aaai.v37i12.26650"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Kai Arulkumaran Marc\u00a0Peter Deisenroth Miles Brundage and Anil\u00a0Anthony Bharath. Deep reinforcement learning: A brief survey. IEEE Signal Processing Magazine 34(6):26\u201338 2017.","DOI":"10.1109\/MSP.2017.2743240"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei\u00a0A Rusu Joel Veness Marc\u00a0G Bellemare Alex Graves Martin Riedmiller Andreas\u00a0K Fidjeland Georg Ostrovski et\u00a0al. Human-level control through deep reinforcement learning. nature 518(7540):529\u2013533 2015.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Taiyu Zhu Kezhi Li Pau Herrero and Pantelis Georgiou. Basal glucose control in type 1 diabetes using deep reinforcement learning: An in silico validation. IEEE Journal of Biomedical and Health Informatics 25(4):1223\u20131232 2020.","DOI":"10.1109\/JBHI.2020.3014556"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Chiara Dalla\u00a0Man Marc\u00a0D Breton and Claudio Cobelli. Physical activity into the meal glucose\u2014insulin model of type 1 diabetes: In silico studies 2009.","DOI":"10.1177\/193229680900300107"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Chiara\u00a0Dalla Man Francesco Micheletto Dayu Lv Marc Breton Boris Kovatchev and Claudio Cobelli. The uva\/padova type 1 diabetes simulator: new features. Journal of diabetes science and technology 8(1):26\u201334 2014.","DOI":"10.1177\/1932296813514502"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Hado Van\u00a0Hasselt Arthur Guez and David Silver. Deep reinforcement learning with double q-learning. In Proceedings of the AAAI conference on artificial intelligence volume\u00a030 2016.","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_3_1_24_2","unstructured":"Tom Schaul John Quan Ioannis Antonoglou and David Silver. Prioritized experience replay 2016."},{"key":"e_1_3_3_1_25_2","unstructured":"Ziyu Wang Tom Schaul Matteo Hessel Hado Hasselt Marc Lanctot and Nando Freitas. Dueling network architectures for deep reinforcement learning. In International conference on machine learning pages 1995\u20132003. PMLR 2016."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Richard\u00a0S Sutton. Learning to predict by the methods of temporal differences. Machine learning 3:9\u201344 1988.","DOI":"10.1007\/BF00115009"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. Reinforcement learning: An introduction. Robotica 17(2):229\u2013235 1999.","DOI":"10.1017\/S0263574799211174"},{"key":"e_1_3_3_1_28_2","unstructured":"Marc\u00a0G Bellemare Will Dabney and R\u00e9mi Munos. A distributional perspective on reinforcement learning. In Proceedings of the 34th International Conference on Machine Learning volume\u00a070 of ICML\u201917 pages 449\u2013458 Sydney NSW Australia 2017. PMLR JMLR.org."},{"key":"e_1_3_3_1_29_2","unstructured":"Meire Fortunato Mohammad\u00a0Gheshlaghi Azar Bilal Piot Jacob Menick Ian Osband Alex Graves Vlad Mnih Remi Munos Demis Hassabis Olivier Pietquin Charles Blundell and Shane Legg. Noisy networks for exploration 2019."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Matteo Hessel Joseph Modayil Hado Van\u00a0Hasselt Tom Schaul Georg Ostrovski Will Dabney Dan Horgan Bilal Piot Mohammad Azar and David Silver. Rainbow: Combining improvements in deep reinforcement learning. In Proceedings of the AAAI conference on artificial intelligence volume\u00a032 2018.","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"e_1_3_3_1_31_2","unstructured":"Aniruddh Raghu Matthieu Komorowski Leo\u00a0Anthony Celi Peter Szolovits and Marzyeh Ghassemi. Continuous state-space models for optimal sepsis treatment: a deep reinforcement learning approach. In Machine Learning for Healthcare Conference pages 147\u2013163. PMLR 2017."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Daniel Lopez-Martinez Patrick Eschenfeldt Sassan Ostvar Myles Ingram Chin Hur and Rosalind Picard. Deep reinforcement learning for optimal critical care pain management with morphine using dueling double-deep q networks. In 2019 41st Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC) pages 3960\u20133963. IEEE 2019.","DOI":"10.1109\/EMBC.2019.8857295"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"Harry Emerson Matthew Guy and Ryan McConville. Offline reinforcement learning for safer blood glucose control in people with type 1 diabetes. Journal of Biomedical Informatics 142:104376 2023.","DOI":"10.1016\/j.jbi.2023.104376"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Leslie\u00a0Pack Kaelbling Michael\u00a0L Littman and Anthony\u00a0R Cassandra. Planning and acting in partially observable stochastic domains. Artificial intelligence 101(1-2):99\u2013134 1998.","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Xuanchen Xiang and Simon Foo. Recent advances in deep reinforcement learning applications for solving partially observable markov decision processes (pomdp) problems: Part 1\u2014fundamentals and applications in games robotics and natural language processing. Machine Learning and Knowledge Extraction 3(3):554\u2013581 2021.","DOI":"10.3390\/make3030029"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Lingheng Meng Rob Gorbet and Dana Kuli\u0107. Memory-based deep reinforcement learning for pomdps. In 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) pages 5619\u20135626. IEEE 2021.","DOI":"10.1109\/IROS51168.2021.9636140"},{"key":"e_1_3_3_1_37_2","unstructured":"J\u00a0Fernando Hernandez-Garcia and Richard\u00a0S Sutton. Understanding multi-step deep reinforcement learning: A systematic study of the dqn target 2019."},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"crossref","unstructured":"Wei Yuan Yueyuan Li Hanyang Zhuang Chunxiang Wang and Ming Yang. Prioritized experience replay-based deep q learning: Multiple-reward architecture for highway driving decision making. IEEE Robotics & Automation Magazine 28(4):21\u201331 2021.","DOI":"10.1109\/MRA.2021.3115980"},{"key":"e_1_3_3_1_39_2","unstructured":"Volodymyr Mnih Adria\u00a0Puigdomenech Badia Mehdi Mirza Alex Graves Timothy Lillicrap Tim Harley David Silver and Koray Kavukcuoglu. Asynchronous methods for deep reinforcement learning. In International conference on machine learning pages 1928\u20131937. PMLR 2016."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Jingtao Qin Nanpeng Yu and Yuanqi Gao. Solving unit commitment problems with multi-step deep reinforcement learning. In 2021 IEEE international conference on communications control and computing technologies for smart grids (SmartGridComm) pages 140\u2013145. IEEE 2021.","DOI":"10.1109\/SmartGridComm51999.2021.9632339"},{"key":"e_1_3_3_1_41_2","unstructured":"Jinyu Xie. Simglucose v0.2.1 2018."},{"key":"e_1_3_3_1_42_2","unstructured":"Greg Brockman Vicki Cheung Ludwig Pettersson Jonas Schneider John Schulman Jie Tang and Wojciech Zaremba. Openai gym 2016."}],"event":{"name":"ICBBT 2024: 2024 16th International Conference on Bioinformatics and Biomedical Technology","location":"Chongqing China","acronym":"ICBBT 2024"},"container-title":["Proceedings of the 2024 16th International Conference on Bioinformatics and Biomedical Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3674658.3674689","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3674658.3674689","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:50Z","timestamp":1750294670000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3674658.3674689"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,24]]},"references-count":41,"alternative-id":["10.1145\/3674658.3674689","10.1145\/3674658"],"URL":"https:\/\/doi.org\/10.1145\/3674658.3674689","relation":{},"subject":[],"published":{"date-parts":[[2024,5,24]]},"assertion":[{"value":"2024-11-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}