{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:21:46Z","timestamp":1780053706559,"version":"3.54.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2020,9,24]],"date-time":"2020-09-24T00:00:00Z","timestamp":1600905600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,24]],"date-time":"2020-09-24T00:00:00Z","timestamp":1600905600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1007\/s42979-020-00326-5","type":"journal-article","created":{"date-parts":[[2020,9,24]],"date-time":"2020-09-24T13:03:26Z","timestamp":1600952606000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":113,"title":["Reinforcement Learning in Dynamic Task Scheduling: A Review"],"prefix":"10.1007","volume":"1","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5320-5566","authenticated-orcid":false,"given":"Chathurangi","family":"Shyalika","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Thushari","family":"Silva","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Asoka","family":"Karunananda","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,9,24]]},"reference":[{"issue":"3","key":"326_CR1","doi-asserted-by":"publisher","first-page":"362","DOI":"10.17775\/CSEEJPES.2018.00520","volume":"4","author":"D Zhang","year":"2018","unstructured":"Zhang D, Han X, Deng C. Review on the research and practice of deep learning and reinforcement learning in smart grids. CSEE J Power Energy Syst. 2018;4(3):362\u201370. https:\/\/doi.org\/10.17775\/CSEEJPES.2018.00520.","journal-title":"CSEE J Power Energy Syst"},{"issue":"3","key":"326_CR2","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1049\/iet-cim.2018.0009","volume":"1","author":"J Xie","year":"2019","unstructured":"Xie J, Gao L, Peng K, Li X, Li H. Review on flexible job shop scheduling. IET Collab Intell Manuf. 2019;1(3):67\u201377. https:\/\/doi.org\/10.1049\/iet-cim.2018.0009.","journal-title":"IET Collab Intell Manuf"},{"key":"326_CR3","doi-asserted-by":"publisher","first-page":"106208","DOI":"10.1016\/j.asoc.2020.106208","volume":"91","author":"S Luo","year":"2020","unstructured":"Luo S. Dynamic scheduling for flexible job shop with new job insertions by deep reinforcement learning. Appl Soft Comput. 2020;91:106208. https:\/\/doi.org\/10.1016\/j.asoc.2020.106208.","journal-title":"Appl Soft Comput"},{"issue":"3","key":"326_CR4","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.dcan.2016.06.004","volume":"2","author":"MD Nashid Anjum","year":"2016","unstructured":"Nashid Anjum MD, Wang H. Dynamic scheduling and analysis of real time systems with multiprocessors. Digit Commun Netw. 2016;2(3):130\u20138. https:\/\/doi.org\/10.1016\/j.dcan.2016.06.004.","journal-title":"Digit Commun Netw"},{"issue":"6","key":"326_CR5","doi-asserted-by":"crossref","first-page":"16","DOI":"10.14311\/490","volume":"43","author":"T Hagras","year":"2003","unstructured":"Hagras T, Jane\u010dek J. Static vs. dynamic list-scheduling performance comparison. Acta Polytechn. 2003;43(6):16\u201321.","journal-title":"Acta Polytechn."},{"key":"326_CR6","unstructured":"Kopetz H, Real-time scheduling. In: Real-time systems. The International series in engineering and computer science. vol 395. Springer: Boston; 2002."},{"issue":"1","key":"326_CR7","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1016\/j.datak.2010.09.002","volume":"70","author":"Z Huang","year":"2011","unstructured":"Huang Z, van der Aalst WMP, Lu X, Duan H. Reinforcement learning based resource allocation in business process management. Data Knowl Eng. 2011;70(1):127\u201345.","journal-title":"Data Knowl Eng"},{"key":"326_CR8","unstructured":"Kumar V, Bhambri S, Shambharkar PG. Multiple resource management and burst time prediction using deep reinforcement learning. In: Eighth International Conference on advances in computing, communication and information technology CCIT, 2019, pp. 51\u201358."},{"key":"326_CR9","doi-asserted-by":"publisher","unstructured":"Xiao Z, Ma S, Zhang S. Learning task allocation for multiple flows in multi-agent systems. In: 2009 International Conference on communication software and networks, Chengdu Sichuan, China; 2009, pp. 153\u2013157. https:\/\/doi.org\/10.1109\/ICCSN.2009.28.","DOI":"10.1109\/ICCSN.2009.28"},{"key":"326_CR10","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1016\/j.ast.2019.06.024","volume":"92","author":"X Zhao","year":"2019","unstructured":"Zhao X, Zong Q, Tian B, Zhang B, You M. Fast task allocation for heterogeneous unmanned aerial vehicles through reinforcement learning. Aerosp Sci Technol. 2019;92:588\u201394. https:\/\/doi.org\/10.1016\/j.ast.2019.06.024.","journal-title":"Aerosp Sci Technol"},{"key":"326_CR11","doi-asserted-by":"publisher","unstructured":"Nguyen H, La H. Review of Deep Reinforcement Learning for Robot Manipulation. In: 2019 Third IEEE International Conference on robotic computing (IRC). Naples, Italy; 2019, pp. 590\u2013595. https:\/\/doi.org\/10.1109\/IRC.2019.00120.","DOI":"10.1109\/IRC.2019.00120"},{"key":"326_CR12","doi-asserted-by":"publisher","unstructured":"Hou J, Li H, Hu J, Zhao C, Guo Y, Li S et al. A review of the applications and hotspots of reinforcement learning. In: 2017 IEEE International Conference on unmanned systems (ICUS). Beijing, China; 2017, pp. 506\u2013511. https:\/\/doi.org\/10.1109\/ICUS.2017.8278398.","DOI":"10.1109\/ICUS.2017.8278398"},{"key":"326_CR13","doi-asserted-by":"publisher","unstructured":"Zhang L, Qi Q, Wang J, Sun H, Liao J. Multi-task deep reinforcement learning for scalable parallel task scheduling. In: 2019 IEEE International Conference on big data (Big Data). Los Angeles, CA, USA: IEEE; 2019, pp. 2992\u20133001. https:\/\/doi.org\/10.1109\/BigData47090.2019.9006027.","DOI":"10.1109\/BigData47090.2019.9006027"},{"key":"326_CR14","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG. Reinforcement learning: an introduction. 2nd ed. Cambridge: The MIT Press; 2018.","edition":"2"},{"key":"326_CR15","doi-asserted-by":"publisher","unstructured":"Tian Y-T, Yang M, Qi X-Y, Yang Y-M. Multi-robot task allocation for fire-disaster response based on reinforcement learning. In: 2009 International Conference on machine learning and cybernetics. IEEE; 2009, pp. 2312\u20132317. https:\/\/doi.org\/10.1109\/ICMLC.2009.5212216.","DOI":"10.1109\/ICMLC.2009.5212216"},{"issue":"2","key":"326_CR16","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1049\/iet-its.2009.0070","volume":"4","author":"I Arel","year":"2010","unstructured":"Arel I, Liu C, Urbanik T, Kohls AG. Reinforcement learning-based multi-agent system for network traffic signal control. IET Intell Transport Syst. 2010;4(2):128. https:\/\/doi.org\/10.1049\/iet-its.2009.0070.","journal-title":"IET Intell Transport Syst"},{"issue":"1","key":"326_CR17","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.engappai.2004.08.018","volume":"18","author":"Y-C Wang","year":"2005","unstructured":"Wang Y-C, Usher JM. Application of reinforcement learning for agent-based production scheduling. Eng Appl Artif Intell. 2005;18(1):73\u201382. https:\/\/doi.org\/10.1016\/j.engappai.2004.08.018.","journal-title":"Eng Appl Artif Intell"},{"issue":"1","key":"326_CR18","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1186\/s13673-019-0187-4","volume":"9","author":"Y Sun","year":"2019","unstructured":"Sun Y, Tan W. A trust-aware task allocation method using deep q-learning for uncertain mobile crowdsourcing. Hum Cent Comput Inf Sci. 2019;9(1):25. https:\/\/doi.org\/10.1186\/s13673-019-0187-4.","journal-title":"Hum Cent Comput Inf Sci"},{"key":"326_CR19","doi-asserted-by":"publisher","unstructured":"Ben Noureddine D, Gharbi A, Ben Ahmed S. Multi-agent Deep Reinforcement Learning for Task Allocation in Dynamic Environment: In: Proceedings of the 12th International Conference on software technologies, Madrid, Spain: SCITEPRESS\u2013Science and Technology Publications; 2017. p. 17-26. https:\/\/doi.org\/10.5220\/0006393400170026.","DOI":"10.5220\/0006393400170026"},{"issue":"5","key":"326_CR20","doi-asserted-by":"publisher","first-page":"7635","DOI":"10.1109\/JIOT.2019.2903191","volume":"6","author":"K Zhang","year":"2019","unstructured":"Zhang K, Zhu Y, Leng S, He Y, Maharjan S, Zhang Y. Deep learning empowered task offloading for mobile edge computing in urban informatics. IEEE Internet Things J. 2019;6(5):7635\u201347. https:\/\/doi.org\/10.1109\/JIOT.2019.2903191.","journal-title":"IEEE Internet Things J"},{"key":"326_CR21","doi-asserted-by":"publisher","unstructured":"Chantaravarapan S, Gunal A, Williams EJ. On Using Monte Carlo Methods for Scheduling. In: Proceedings of the 2004 Winter Simulation Conference, 2004. Washington, D.C.: IEEE; 2004, pp. 789\u2013794. https:\/\/doi.org\/10.1109\/WSC.2004.1371542.","DOI":"10.1109\/WSC.2004.1371542"},{"key":"326_CR22","unstructured":"Zhang W, Dietterich TG. A Reinforcement Learning Approach to Job-shop Scheduling. In: Proceedings of the 14th International Joint Conference on artificial intelligence (IJCAI-95). Morgan Kaufmann, Orlando, FL; 1995, pp. 1114\u20131120."},{"key":"326_CR23","unstructured":"Zhang W. Reinforcement learning for job-shop scheduling [Doctor of Philosophy in Computer Science]. Oregon State University; 1996."},{"key":"326_CR24","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I. Multi-agent actor-critic for mixed cooperative-competitive environments. arXiv:170602275 [cs] [Internet]. 2017 Jun 7; Available from: http:\/\/arxiv.org\/abs\/1706.02275."},{"issue":"1","key":"326_CR25","doi-asserted-by":"publisher","first-page":"680","DOI":"10.1109\/TWC.2017.2769644","volume":"17","author":"Y Wei","year":"2018","unstructured":"Wei Y, Yu FR, Song M, Han Z. User scheduling and resource allocation in HetNets with hybrid energy supply: an actor-critic reinforcement learning approach. IEEE Trans Wirel Commun. 2018;17(1):680\u201392. https:\/\/doi.org\/10.1109\/TWC.2017.2769644.","journal-title":"IEEE Trans Wirel Commun"},{"key":"326_CR26","doi-asserted-by":"publisher","first-page":"71752","DOI":"10.1109\/ACCESS.2020.2987820","volume":"8","author":"C-L Liu","year":"2020","unstructured":"Liu C-L, Chang C-C, Tseng C-J. Actor-critic deep reinforcement learning for solving job shop scheduling problems. IEEE Access. 2020;8:71752\u201362. https:\/\/doi.org\/10.1109\/ACCESS.2020.2987820.","journal-title":"IEEE Access"},{"key":"326_CR27","unstructured":"Kim D, Moon S, Hostallero D, Kang WJ, Lee T, Son K, et al. Learning to schedule communication in multi-agent reinforcement learning. arXiv:190201554 [cs] [Internet]. 2019 Feb 5; Available from: http:\/\/arxiv.org\/abs\/1902.01554."},{"key":"326_CR28","unstructured":"Wiering M. Multi-agent reinforcement learning for traffic light control. In: 17th International Conf. on Machine Learning (ICML). 2000; pp. 1151\u20131158."},{"key":"326_CR29","doi-asserted-by":"publisher","unstructured":"Sutton RS. Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Machine learning proceedings 1990. Elsevier; 1990. pp. 216\u2013224. https:\/\/doi.org\/10.1016\/B978-1-55860-141-3.50030-4.","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"326_CR30","doi-asserted-by":"publisher","unstructured":"Peng B, Li X, Gao J, Liu J, Wong K-F. Deep Dyna-Q: integrating planning for task-completion dialogue policy learning. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Melbourne, Australia: Association for Computational Linguistics; 2018. pp. 2182\u20132192. https:\/\/doi.org\/10.18653\/v1\/P18-1203.","DOI":"10.18653\/v1\/P18-1203"},{"key":"326_CR31","doi-asserted-by":"crossref","unstructured":"Su S, Li X, Gao J, Liu J, Chen Y. Discriminative Deep Dyna-Q: Robust Planning for Dialogue Policy Learning. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing. 2018. https:\/\/arxiv.org\/abs\/1808.09442.","DOI":"10.18653\/v1\/D18-1416"},{"key":"326_CR32","doi-asserted-by":"publisher","unstructured":"Peng J, Williams RJ. Efficient learning and planning within the dyna framework. 7. https:\/\/doi.org\/10.1177\/105971239300100403.","DOI":"10.1177\/105971239300100403"},{"key":"326_CR33","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2935201","author":"J Cui","year":"2019","unstructured":"Cui J, Liu Y, Nallanathan A. Multi-agent reinforcement learning based resource allocation for UAV networks. IEEE Trans Wirel Commun. 2019;. https:\/\/doi.org\/10.1109\/TWC.2019.2935201.","journal-title":"IEEE Trans Wirel Commun"},{"key":"326_CR34","unstructured":"Zheng L, Yang J, Cai H, Zhang W, Wang J, Yu Y. MAgent: a many-agent reinforcement learning platform for artificial collective intelligence. arXiv:171200600 [cs] [Internet]. 2017 Dec 2; Available from: http:\/\/arxiv.org\/abs\/1712.00600."},{"issue":"4","key":"326_CR35","first-page":"30","volume":"24","author":"T Gabel","year":"2008","unstructured":"Gabel T, Riedmiller M. Adaptive reactive job-shop scheduling with reinforcement learning agents. Int J Inf Technol Intell Comput. 2008a;24(4):30.","journal-title":"Int J Inf Technol Intell Comput"},{"issue":"5","key":"326_CR36","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1016\/j.future.2010.10.009","volume":"27","author":"J Wu","year":"2011","unstructured":"Wu J, Xu X, Zhang P, Liu C. A novel multi-agent reinforcement learning approach for job scheduling in Grid computing. Future Gener Comput Syst. 2011;27(5):430\u20139. https:\/\/doi.org\/10.1016\/j.future.2010.10.009.","journal-title":"Future Gener Comput Syst"},{"issue":"1","key":"326_CR37","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1049\/trit.2018.0001","volume":"3","author":"J Wu","year":"2018","unstructured":"Wu J, Xu X. Decentralised grid scheduling approach based on multi-agent reinforcement learning and gossip mechanism. CAAI Trans Intell Technol. 2018;3(1):8\u201317. https:\/\/doi.org\/10.1049\/trit.2018.0001.","journal-title":"CAAI Trans Intell Technol"},{"key":"326_CR38","doi-asserted-by":"publisher","unstructured":"Moradi MA: Centralized reinforcement learning method for multi-agent job scheduling in Grid. In: 6th International Conference on Computer and Knowledge Engineering (ICCKE). Mashhad, Iran: IEEE; 2016. pp. 171\u20136. https:\/\/doi.org\/10.1109\/ICCKE.2016.7802135.","DOI":"10.1109\/ICCKE.2016.7802135"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-020-00326-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-020-00326-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-020-00326-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,24]],"date-time":"2021-09-24T09:20:19Z","timestamp":1632475219000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-020-00326-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,24]]},"references-count":38,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2020,11]]}},"alternative-id":["326"],"URL":"https:\/\/doi.org\/10.1007\/s42979-020-00326-5","relation":{},"ISSN":["2662-995X","2661-8907"],"issn-type":[{"value":"2662-995X","type":"print"},{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,9,24]]},"assertion":[{"value":"12 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 September 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 September 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical Standards"}},{"value":"The authors declare that there are no conflicts of interest regarding the publication of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest\/Competing Interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code Availability"}}],"article-number":"306"}}