{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T06:28:36Z","timestamp":1775975316747,"version":"3.50.1"},"reference-count":107,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T00:00:00Z","timestamp":1634601600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T00:00:00Z","timestamp":1634601600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Manuf"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s10845-021-01847-3","type":"journal-article","created":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T10:13:10Z","timestamp":1634638390000},"page":"905-929","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":119,"title":["Reinforcement learning applications to machine scheduling problems: a comprehensive literature review"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6881-2580","authenticated-orcid":false,"given":"Behice Meltem","family":"Kayhan","sequence":"first","affiliation":[]},{"given":"Gokalp","family":"Yildiz","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,19]]},"reference":[{"issue":"4","key":"1847_CR1","doi-asserted-by":"publisher","first-page":"961","DOI":"10.3311\/PPci.14295","volume":"63","author":"G \u00c1brah\u00e1m","year":"2019","unstructured":"\u00c1brah\u00e1m, G., Auer, P., D\u00f3sa, G., Dulai, T., & Werner-Stark, \u00c3. (2019). A reinforcement learning motivated algorithm for process optimization. Periodica Polytechnica Civil Engineering, 63(4), 961\u2013970. https:\/\/doi.org\/10.3311\/PPci.14295","journal-title":"Periodica Polytechnica Civil Engineering"},{"issue":"6","key":"1847_CR2","doi-asserted-by":"publisher","first-page":"2513","DOI":"10.1007\/s10845-011-0580-y","volume":"23","author":"N Aissani","year":"2012","unstructured":"Aissani, N., Bekrar, A., Trentesaux, D., & Beldjilali, B. (2012). Dynamic scheduling for multi-site companies: A decisional approach based on reinforcement multi-agent learning. Journal of Intelligent Manufacturing, 23(6), 2513\u20132529. https:\/\/doi.org\/10.1007\/s10845-011-0580-y","journal-title":"Journal of Intelligent Manufacturing"},{"key":"1847_CR3","doi-asserted-by":"publisher","unstructured":"Aissani, N., Trentesaux, D., & Beldjilali, B. (2009). Multi-agent reinforcement learning for adaptive scheduling: Application to multi-site company. In IFAC proceedings volumes, (Vol. 42, No. 4, pp. 1102\u20131107). https:\/\/doi.org\/10.3182\/20090603-3-RU-2001.0280.","DOI":"10.3182\/20090603-3-RU-2001.0280"},{"key":"1847_CR4","unstructured":"Aissani, N., & Trentesaux, D. (2008). Efficient and effective reactive scheduling of manufacturing system using Sarsa-multi-objective agents. In Proceedings of the 7th international conference MOSIM, Paris (pp.\u00a0698\u2013707)."},{"issue":"4","key":"1847_CR5","doi-asserted-by":"publisher","first-page":"1196","DOI":"10.1080\/00207543.2015.1057297","volume":"54","author":"K Arviv","year":"2016","unstructured":"Arviv, K., Stern, H., & Edan, Y. (2016). Collaborative reinforcement learning for a two-robot job transfer flow-shop scheduling problem. International Journal of Production Research, 54(4), 1196\u20131209. https:\/\/doi.org\/10.1080\/00207543.2015.1057297","journal-title":"International Journal of Production Research"},{"issue":"1","key":"1847_CR6","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1504\/EJIE.2013.051594","volume":"7","author":"A Atighehchian","year":"2013","unstructured":"Atighehchian, A., & Sepehri, M. M. (2013). An environment-driven, function-based approach to dynamic single-machine scheduling. European Journal of Industrial Engineering, 7(1), 100\u2013118. https:\/\/doi.org\/10.1504\/EJIE.2013.051594","journal-title":"European Journal of Industrial Engineering"},{"issue":"2","key":"1847_CR7","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1016\/S0921-8890(00)00087-7","volume":"33","author":"ME Aydin","year":"2000","unstructured":"Aydin, M. E., & \u00d6ztemel, E. (2000). Dynamic job-shop scheduling using reinforcement learning agents. Robotics and Autonomous Systems, 33(2), 169\u2013178. https:\/\/doi.org\/10.1016\/S0921-8890(00)00087-7","journal-title":"Robotics and Autonomous Systems"},{"issue":"1","key":"1847_CR8","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A. G., & Mahadevan, S. (2003). Recent advances in hierarchical reinforcement learning. Discrete Event Dynamic Systems, 13(1), 41\u201377. https:\/\/doi.org\/10.1023\/A:1022140919877","journal-title":"Discrete Event Dynamic Systems"},{"issue":"1","key":"1847_CR9","doi-asserted-by":"publisher","first-page":"15890","DOI":"10.1016\/j.ifacol.2017.08.2354","volume":"50","author":"W Bouazza","year":"2017","unstructured":"Bouazza, W., Sallez, Y., & Beldjilali, B. (2017). A distributed approach solving partially flexible job-shop scheduling problem with a Q-learning effect. IFAC-PapersOnLine, 50(1), 15890\u201315895. https:\/\/doi.org\/10.1016\/j.ifacol.2017.08.2354","journal-title":"IFAC-PapersOnLine"},{"issue":"6","key":"1847_CR10","doi-asserted-by":"publisher","first-page":"1531","DOI":"10.1007\/s10845-019-01531-7","volume":"31","author":"JPU Cadavid","year":"2020","unstructured":"Cadavid, J. P. U., Lamouri, S., Grabot, B., Pellerin, R., & Fortin, A. (2020). Machine learning applied in production planning and control: a state-of-the-art in the era of industry 4.0. Journal of Intelligent Manufacturing, 31(6), 1531\u20131558. https:\/\/doi.org\/10.1007\/s10845-019-01531-7","journal-title":"Journal of Intelligent Manufacturing"},{"key":"1847_CR11","doi-asserted-by":"publisher","unstructured":"Cs\u00e1ji, B. C., & Monostori, L. (2005). Stochastic approximate scheduling by neurodynamic learning. In IFAC Proceedings Volumes, (Vol. 38, No. 1, pp. 355\u2013360). https:\/\/doi.org\/10.3182\/20050703-6-CZ-1902.01481","DOI":"10.3182\/20050703-6-CZ-1902.01481"},{"key":"1847_CR12","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1613\/jair.2548","volume":"32","author":"BC Cs\u00e1ji","year":"2008","unstructured":"Cs\u00e1ji, B. C., & Monostori, L. (2008). Adaptive stochastic resource control: A machine learning approach. Journal of Artificial Intelligence Research, 32, 453\u2013486. https:\/\/doi.org\/10.1613\/jair.2548","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"3","key":"1847_CR13","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.aei.2006.01.001","volume":"20","author":"BC Cs\u00e1ji","year":"2006","unstructured":"Cs\u00e1ji, B. C., Monostori, L., & K\u00e1d\u00e1r, B. (2006). Reinforcement learning in a distributed market-based production control system. Advanced Engineering Informatics, 20(3), 279\u2013288. https:\/\/doi.org\/10.1016\/j.aei.2006.01.001","journal-title":"Advanced Engineering Informatics"},{"issue":"4","key":"1847_CR14","doi-asserted-by":"publisher","first-page":"560","DOI":"10.1287\/mnsc.45.4.560","volume":"45","author":"TK Das","year":"1999","unstructured":"Das, T. K., Gosavi, A., Mahadevan, S., & Marchalleck, N. (1999). Solving semi-Markov decision problems using average reward reinforcement learning. Management Science, 45(4), 560\u2013574. https:\/\/doi.org\/10.1287\/mnsc.45.4.560","journal-title":"Management Science"},{"key":"1847_CR15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68856-3","volume-title":"Logical and relational learning","author":"L De Raedt","year":"2008","unstructured":"De Raedt, L. (2008). Logical and relational learning. New York: Springer. https:\/\/doi.org\/10.1007\/978-3-540-68856-3."},{"key":"1847_CR16","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-981-15-4095-0_7","volume-title":"In Deep Reinforcement Learning","author":"Z Ding","year":"2020","unstructured":"Ding, Z., & Dong, H. (2020). Challenges of reinforcement learning. In Deep Reinforcement Learning (pp. 249\u2013272). Singapore: Springer. https:\/\/doi.org\/10.1007\/978-981-15-4095-0_7"},{"key":"1847_CR17","unstructured":"Dulac-Arnold, G., Mankowitz, D., & Hester, T. (2019). Challenges of real-world reinforcement learning. (Online) https:\/\/arxiv.org\/abs\/1904.12901"},{"key":"1847_CR18","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1016\/j.jocs.2017.06.004","volume":"25","author":"HY Fuchigami","year":"2018","unstructured":"Fuchigami, H. Y., & Rangel, S. (2018). A survey of case studies in production scheduling: Analysis and perspectives. Journal of Computational Science, 25, 425\u2013436. https:\/\/doi.org\/10.1016\/j.jocs.2017.06.004","journal-title":"Journal of Computational Science"},{"issue":"7","key":"1847_CR19","doi-asserted-by":"publisher","first-page":"072035","DOI":"10.1088\/1742-6596\/1486\/7\/072035","volume":"1486","author":"G Fang","year":"2020","unstructured":"Fang, G., Li, Y., Liu, A., & Liu, Z. (2020). A reinforcement learning method to scheduling problem of steel production process.Journal of Physics: Conference Series,\u00a01486(7), 072035. https:\/\/doi.org\/10.1088\/1742-6596\/1486\/7\/072035","journal-title":"Journal of Physics: Conference Series"},{"key":"1847_CR20","unstructured":"Gabel, T., & Riedmiller, M. (2006a). Reducing policy degradation in neuro-dynamic programming. In ESANN 2006 Proceedings - European Symposium on Artificial Neural Networks (pp.\u00a0653\u2013658)."},{"key":"1847_CR21","doi-asserted-by":"publisher","DOI":"10.1007\/11805816_5","volume-title":"Advances in case-based reasoning. ECCBR 2006","author":"T Gabel","year":"2006","unstructured":"Gabel, T., & Riedmiller, M. (2006b). Multi-agent case-based reasoning for cooperative reinforcement learners. In Roth-Berghofer, T. R., G\u00f6ker, M. H., & G\u00fcvenir, H. A. (Eds.), Advances in case-based reasoning. ECCBR 2006 (4106 vol.). Berlin, Heidelberg: Springer. https:\/\/doi.org\/10.1007\/11805816_5"},{"key":"1847_CR22","doi-asserted-by":"publisher","unstructured":"Gabel, T., & Riedmiller, M. (2007a). On a successful application of multi-agent reinforcement learning to operations research benchmarks. In 2007 IEEE international symposium on approximate dynamic programming and reinforcement learning (pp.\u00a068\u201375). https:\/\/doi.org\/10.1109\/ADPRL.2007.368171","DOI":"10.1109\/ADPRL.2007.368171"},{"key":"1847_CR23","doi-asserted-by":"publisher","unstructured":"Gabel, T., & Riedmiller, M. (2007b). Scaling adaptive agent-based reactive job-shop scheduling to large-scale problems. In Proceedings of the 2007 IEEE symposium on computational Intelligence in scheduling, CI-Sched 2007 (pp.\u00a0259\u2013266). https:\/\/doi.org\/10.1109\/SCIS.2007.367699","DOI":"10.1109\/SCIS.2007.367699"},{"issue":"4","key":"1847_CR24","first-page":"14","volume":"24","author":"T Gabel","year":"2008","unstructured":"Gabel, T., & Riedmiller, M. (2008). Adaptive reactive job-shop scheduling with reinforcement learning agents. International Journal of Information Technology and Intelligent Computing, 24(4), 14\u201318","journal-title":"International Journal of Information Technology and Intelligent Computing"},{"issue":"1","key":"1847_CR25","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1080\/00207543.2011.571443","volume":"50","author":"T Gabel","year":"2011","unstructured":"Gabel, T., & Riedmiller, M. (2011). Distributed policy search reinforcement learning for job-shop scheduling tasks. International Journal of Production Research, 50(1), 41\u201361. https:\/\/doi.org\/10.1080\/00207543.2011.571443","journal-title":"International Journal of Production Research"},{"key":"1847_CR26","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-7491-4","volume-title":"Simulation-based optimization","author":"A Gosavi","year":"2015","unstructured":"Gosavi, A. (2015). Simulation-based optimization. Berlin: Springer"},{"key":"1847_CR27","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/S0167-5060(08)70356-X","volume":"5","author":"RL Graham","year":"1979","unstructured":"Graham, R. L., Lawler, E. L., Lenstra, J. K., & Kan, A. H. G. R. (1979). Optimization and approximation in deterministic sequencing and scheduling: A survey. Annals of Discrete Mathematics, 5, 287\u2013326. https:\/\/doi.org\/10.1016\/S0167-5060(08)70356-X","journal-title":"Annals of Discrete Mathematics"},{"key":"1847_CR28","doi-asserted-by":"publisher","unstructured":"Guo, L., Zhuang, Z., Huang, Z., & Qin, W. (2020). Optimization of dynamic multi-objective non-identical parallel machine scheduling with multi-stage reinforcement learning. In 2020 IEEE 16th international conference on automation science and engineering (CASE) (pp.\u00a01215\u20131219). https:\/\/doi.org\/10.1109\/CASE48305.2020.9216743","DOI":"10.1109\/CASE48305.2020.9216743"},{"key":"1847_CR29","doi-asserted-by":"publisher","unstructured":"Han, W., Guo, F., & Su, X. (2019). A reinforcement learning method for a hybrid flow-shop scheduling problem. Algorithms, 12(11), https:\/\/doi.org\/10.3390\/a12110222","DOI":"10.3390\/a12110222"},{"key":"1847_CR30","doi-asserted-by":"publisher","first-page":"106685","DOI":"10.1016\/j.knosys.2020.106685","volume":"214","author":"A Heuillet","year":"2021","unstructured":"Heuillet, A., Couthouis, F., & D\u00edaz-Rodr\u00edguez, N. (2021). Explainability in deep reinforcement learning. Knowledge-Based Systems, 214, 106685. https:\/\/doi.org\/10.1016\/j.knosys.2020.106685","journal-title":"Knowledge-Based Systems"},{"issue":"1","key":"1847_CR31","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1023\/B:APIN.0000011143.95085.74","volume":"20","author":"J Hong","year":"2004","unstructured":"Hong, J., & Prabhu, V. V. (2004). Distributed reinforcement learning control for batch sequencing and sizing in just-in-time manufacturing systems. Applied Intelligence, 20(1), 71\u201387. https:\/\/doi.org\/10.1023\/B:APIN.0000011143.95085.74","journal-title":"Applied Intelligence"},{"key":"1847_CR32","doi-asserted-by":"publisher","unstructured":"Idrees, H. D., Sinnokrot, M. O., & Al-Shihabi, S. (2006). A reinforcement learning algorithm to minimize the mean tardiness of a single machine with controlled capacity. In Proceedings - Winter simulation conference (pp.\u00a01765\u20131769). https:\/\/doi.org\/10.1109\/WSC.2006.322953","DOI":"10.1109\/WSC.2006.322953"},{"key":"1847_CR33","doi-asserted-by":"publisher","unstructured":"Iwamura, K., Mayumi, N., Tanimizu, Y., & Sugimura, N. (2010). A study on real-time scheduling for holonic manufacturing systems - Determination of utility values based on multi-agent reinforcement learning. In International conference on industrial applications of holonic and multi-agent systems (pp.\u00a0135\u2013144). Springer, Berlin, Heidelberg. https:\/\/doi.org\/10.1007\/978-3-642-03668-2_13","DOI":"10.1007\/978-3-642-03668-2_13"},{"key":"1847_CR34","doi-asserted-by":"publisher","unstructured":"Jim\u00e9nez, Y. M., Palacio, J. C., & Now\u00e9, A. (2020). Multi-agent reinforcement learning tool for job shop scheduling problems. In International conference on optimization and learning (pp.\u00a03\u201312). https:\/\/doi.org\/10.1007\/978-3-030-41913-4_1","DOI":"10.1007\/978-3-030-41913-4_1"},{"key":"1847_CR35","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L Kaelbling","year":"1996","unstructured":"Kaelbling, L., Littman, M. L., Moore, A. W., & Hall, S. (1996). Reinforcement learning: A survey. Journal of Artificial Intelligence Research, 4, 237\u2013285. https:\/\/doi.org\/10.1613\/jair.301","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"2","key":"1847_CR36","doi-asserted-by":"publisher","first-page":"727","DOI":"10.1109\/TITS.2018.2829165","volume":"20","author":"H Khadilkar","year":"2018","unstructured":"Khadilkar, H. (2018). A scalable reinforcement learning algorithm for scheduling railway lines. IEEE Transactions on Intelligent Transportation Systems, 20(2), 727\u2013736. https:\/\/doi.org\/10.1109\/TITS.2018.2829165","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"1847_CR37","doi-asserted-by":"publisher","unstructured":"Kim, G. H., & Lee, C. S. G. (1996). Genetic reinforcement learning for scheduling heterogeneous machines. In Proceedings - IEEE International Conference on Robotics and Automation (Vol.\u00a03, pp.\u00a02798\u20132803). https:\/\/doi.org\/10.1109\/ROBOT.1996.506586","DOI":"10.1109\/ROBOT.1996.506586"},{"key":"1847_CR38","doi-asserted-by":"publisher","unstructured":"Kim, N., & Shin, H. (2017). The application of actor-critic reinforcement learning for fab dispatching scheduling. In 2017 Winter simulation conference (pp.\u00a04570\u20134571). https:\/\/doi.org\/10.1109\/WSC.2017.8248209","DOI":"10.1109\/WSC.2017.8248209"},{"key":"1847_CR39","doi-asserted-by":"publisher","unstructured":"Kong, L. F., & Wu, J. (2005). Dynamic single machine scheduling using Q-learning agent. In 2005 International conference on machine learning and cybernetics, ICMLC 2005 (pp.\u00a03237\u20133241). https:\/\/doi.org\/10.1109\/ICMLC.2005.1527501","DOI":"10.1109\/ICMLC.2005.1527501"},{"issue":"20","key":"1847_CR40","doi-asserted-by":"publisher","first-page":"8718","DOI":"10.3390\/su12208718","volume":"12","author":"S Lee","year":"2020","unstructured":"Lee, S., Cho, Y., & Lee, Y. H. (2020). Injection mold production sustainable scheduling using deep reinforcement learning. Sustainability, 12(20), 8718. https:\/\/doi.org\/10.3390\/su12208718","journal-title":"Sustainability"},{"key":"1847_CR41","doi-asserted-by":"publisher","unstructured":"Lihu, A., & Holban, S. (2009). Top five most promising algorithms in scheduling. In Proceedings \u2013 2009 5th international symposium on applied computational intelligence and informatics, SACI 2009 (pp.\u00a0397\u2013404). https:\/\/doi.org\/10.1109\/SACI.2009.5136281","DOI":"10.1109\/SACI.2009.5136281"},{"issue":"7","key":"1847_CR42","doi-asserted-by":"publisher","first-page":"4276","DOI":"10.1109\/TII.2019.2908210","volume":"15","author":"CC Lin","year":"2019","unstructured":"Lin, C. C., Deng, D. J., Chih, Y. L., & Chiu, H. T. (2019). Smart manufacturing scheduling with edge computing using multiclass deep Q network. IEEE Transactions on Industrial Informatics, 15(7), 4276\u20134284. https:\/\/doi.org\/10.1109\/TII.2019.2908210","journal-title":"IEEE Transactions on Industrial Informatics"},{"key":"1847_CR43","doi-asserted-by":"publisher","unstructured":"Liu, C. C., Jin, H. Y., Tian, Y., & Yu, H. B. (2001). Reinforcement learning approach to re-entrant manufacturing system scheduling. In 2001 International Conferences on Info-Tech and Info-Net: A Key to Better Life, ICII 2001 - Proceedings (Vol.\u00a03, pp.\u00a0280\u2013285). https:\/\/doi.org\/10.1109\/ICII.2001.983070","DOI":"10.1109\/ICII.2001.983070"},{"key":"1847_CR44","doi-asserted-by":"publisher","first-page":"71752","DOI":"10.1109\/ACCESS.2020.2987820","volume":"8","author":"CL Liu","year":"2020","unstructured":"Liu, C. L., Chang, C. C., & Tseng, C. J. (2020). Actor-critic deep reinforcement learning for solving job shop scheduling problems. IEEE Access, 8, 71752\u201371762. https:\/\/doi.org\/10.1109\/ACCESS.2020.2987820","journal-title":"IEEE Access"},{"issue":"3","key":"1847_CR45","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/S1874-8651(10)60008-7","volume":"29","author":"W Liu","year":"2009","unstructured":"Liu, W., & Wang, X. (2009). Dynamic decision model in evolutionary games based on reinforcement learning. Systems Engineering - Theory & Practice, 29(3), 28\u201333. https:\/\/doi.org\/10.1016\/S1874-8651(10)60008-7","journal-title":"Systems Engineering - Theory & Practice"},{"key":"1847_CR46","doi-asserted-by":"publisher","first-page":"106208","DOI":"10.1016\/j.asoc.2020.106208","volume":"91","author":"S Luo","year":"2020","unstructured":"Luo, S. (2020). Dynamic scheduling for flexible job shop with new job insertions by deep reinforcement learning. Applied Soft Computing, 91, 106208. https:\/\/doi.org\/10.1016\/j.asoc.2020.106208","journal-title":"Applied Soft Computing"},{"issue":"2","key":"1847_CR47","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1016\/S0969-6016(00)00014-9","volume":"7","author":"K Miyashita","year":"2000","unstructured":"Miyashita, K. (2000). Learning scheduling control knowledge through reinforcements. International Transactions in Operational Research, 7(2), 125\u2013138. https:\/\/doi.org\/10.1016\/S0969-6016(00)00014-9","journal-title":"International Transactions in Operational Research"},{"issue":"7540","key":"1847_CR48","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., & Hassabis, D., \u2026. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"issue":"1","key":"1847_CR49","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1016\/S0007-8506(07)60462-4","volume":"55","author":"L Monostori","year":"2006","unstructured":"Monostori, L., & Cs\u00e1ji, B. C. (2006). Stochastic dynamic production control by neurodynamic programming. CIRP Annals - Manufacturing Technology, 55(1), 473\u2013478. https:\/\/doi.org\/10.1016\/S0007-8506(07)60462-4","journal-title":"CIRP Annals - Manufacturing Technology"},{"issue":"1","key":"1847_CR50","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1016\/S0007-8506(07)60714-8","volume":"53","author":"L Monostori","year":"2004","unstructured":"Monostori, L., Cs\u00e1ji, B. C., & K\u00e1d\u00e1r, B. (2004). Adaptation and learning in distributed production control. CIRP Annals - Manufacturing Technology, 53(1), 349\u2013352. https:\/\/doi.org\/10.1016\/S0007-8506(07)60714-8","journal-title":"CIRP Annals - Manufacturing Technology"},{"key":"1847_CR51","unstructured":"Nahmias, S., & Olsen, T. L. (2015). Production and operations analysis. Long Grove: Waveland Press"},{"issue":"1","key":"1847_CR52","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1016\/j.engappai.2012.03.011","volume":"26","author":"TRF Neto","year":"2013","unstructured":"Neto, T. R. F., & Godinho Filho, M. (2013). Literature review regarding Ant Colony Optimization applied to scheduling problems: Guidelines for implementation and directions for future research. Engineering Applications of Artificial Intelligence, 26(1), 150\u2013161. https:\/\/doi.org\/10.1016\/j.engappai.2012.03.011","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"1847_CR53","doi-asserted-by":"publisher","unstructured":"Palombarini, J., & Mart\u00ednez, E. (2010). Learning to repair plans and schedules using a relational (deictic) representation. In Computer aided chemical engineering (Vol.\u00a027, pp.\u00a01377\u20131382). Elsevier. https:\/\/doi.org\/10.1016\/s1570-7946(09)70620-0","DOI":"10.1016\/s1570-7946(09)70620-0"},{"key":"1847_CR54","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1016\/j.compchemeng.2012.06.021","volume":"47","author":"J Palombarini","year":"2012","unstructured":"Palombarini, J., & Mart\u00ednez, E. (2012a). SmartGantt \u2013 An interactive system for generating and updating rescheduling knowledge using relational abstractions. Computers and Chemical Engineering, 47, 202\u2013216. https:\/\/doi.org\/10.1016\/j.compchemeng.2012.06.021","journal-title":"Computers and Chemical Engineering"},{"issue":"11","key":"1847_CR55","doi-asserted-by":"publisher","first-page":"10251","DOI":"10.1016\/j.eswa.2012.02.176","volume":"39","author":"J Palombarini","year":"2012","unstructured":"Palombarini, J., & Mart\u00ednez, E. (2012b). SmartGantt \u2013 An intelligent system for real time rescheduling based on relational reinforcement learning. Expert Systems With Applications, 39(11), 10251\u201310268. https:\/\/doi.org\/10.1016\/j.eswa.2012.02.176","journal-title":"Expert Systems With Applications"},{"issue":"17","key":"1847_CR56","doi-asserted-by":"publisher","first-page":"5401","DOI":"10.1080\/00207543.2020.1718794","volume":"58","author":"M Parente","year":"2020","unstructured":"Parente, M., Figueira, G., Amorim, P., & Marques, A. (2020). Production scheduling in the context of Industry 4.0: review and trends. International Journal of Production Research, 58(17), 5401\u20135431. https:\/\/doi.org\/10.1080\/00207543.2020.1718794","journal-title":"International Journal of Production Research"},{"issue":"3","key":"1847_CR57","doi-asserted-by":"publisher","first-page":"1420","DOI":"10.1109\/tase.2019.2956762","volume":"17","author":"I Park","year":"2020","unstructured":"Park, I., Huh, J., Kim, J., & Park, J. (2020). A reinforcement learning approach to robust scheduling of semiconductor manufacturing facilities. IEEE Transactions on Automation Science and Engineering, 17(3), 1420\u20131431. https:\/\/doi.org\/10.1109\/tase.2019.2956762","journal-title":"IEEE Transactions on Automation Science and Engineering"},{"issue":"1","key":"1847_CR58","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1023\/A:1007641824604","volume":"33","author":"CD Paternina-Arboleda","year":"2001","unstructured":"Paternina-Arboleda, C. D., & Das, T. K. (2001). Intelligent dynamic control policies for serial production lines. IIE Transactions, 33(1), 65\u201377. https:\/\/doi.org\/10.1023\/A:1007641824604","journal-title":"IIE Transactions"},{"key":"1847_CR59","doi-asserted-by":"publisher","unstructured":"Qu, S., Chu, T., Wang, J., Leckie, J., & Jian, W. (2015). A centralized reinforcement learning approach for proactive scheduling in manufacturing. In IEEE international conference on emerging technologies and factory automation, ETFA (Vol. 2015-Octob, pp.\u00a01\u20138). https:\/\/doi.org\/10.1109\/ETFA.2015.7301417","DOI":"10.1109\/ETFA.2015.7301417"},{"key":"1847_CR60","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/j.procir.2016.11.011","volume":"57","author":"S Qu","year":"2016","unstructured":"Qu, S., Wang, J., Govil, S., & Leckie, J. O. (2016a). Optimized adaptive scheduling of a manufacturing process system with multi-skill workforce and multiple machine types: An ontology-based, multi-agent reinforcement learning approach. Procedia CIRP, 57, 55\u201360. https:\/\/doi.org\/10.1016\/j.procir.2016.11.011","journal-title":"Procedia CIRP"},{"key":"1847_CR61","doi-asserted-by":"publisher","unstructured":"Qu, S., Jie, W., & Shivani, G. (2016b). Learning adaptive dispatching rules for a manufacturing process system by using reinforcement learning approach. In IEEE International Conference on Emerging Technologies and Factory Automation, ETFA (Vol. 2016-Novem, pp.\u00a01\u20138). https:\/\/doi.org\/10.1109\/etfa.2016.7733712","DOI":"10.1109\/etfa.2016.7733712"},{"key":"1847_CR62","unstructured":"Qu, G., Wierman, A., & Li, N. (2020). Scalable reinforcement learning of localized policies for multi-agent networked systems. In Learning for Dynamics and Control (pp.\u00a0256\u2013266)."},{"key":"1847_CR63","doi-asserted-by":"publisher","unstructured":"Ram\u00edrez-Hern\u00e1ndez, J. A., & Fernandez, E. (2005). A case study in scheduling reentrant manufacturing lines: Optimal and simulation-based approaches. In Proceedings of the 44th IEEE conference on decision and control (Vol.\u00a02005, pp.\u00a02158\u20132163). https:\/\/doi.org\/10.1109\/CDC.2005.1582481","DOI":"10.1109\/CDC.2005.1582481"},{"key":"1847_CR64","doi-asserted-by":"publisher","unstructured":"Ram\u00edrez-Hern\u00e1ndez, J. A., & Fernandez, E. (2009). A simulation-based approximate dynamic programming approach for the control of the intel Mini-Fab benchmark model. In Proceedings - Winter simulation conference (pp.\u00a01634\u20131645). https:\/\/doi.org\/10.1109\/wsc.2009.5429179","DOI":"10.1109\/wsc.2009.5429179"},{"key":"1847_CR65","doi-asserted-by":"publisher","first-page":"157","DOI":"10.2507\/ijsimm19-1-co4","volume":"19","author":"J Ren","year":"2020","unstructured":"Ren, J., Ye, C., & Yang, F. (2020). A novel solution to JSPs based on long short-term memory and policy gradient algorithm. International Journal of Simulation Modelling, 19, 157\u2013168. https:\/\/doi.org\/10.2507\/ijsimm19-1-co4","journal-title":"International Journal of Simulation Modelling"},{"key":"1847_CR66","unstructured":"Reyna, Y. C. F., C\u00e1ceres, A. P., Jim\u00e9nez, Y. M., & Reyes, Y. T. (2019a). An improvement of reinforcement learning approach for permutation of flow-shop scheduling problems. In RISTI - Revista Iberica de Sistemas e Tecnologias de Informacao, (E18), pp. 257\u2013270."},{"issue":"1","key":"1847_CR67","first-page":"100","volume":"40","author":"YCF Reyna","year":"2019","unstructured":"Reyna, Y. C. F., Jim\u00e9nez, Y. M., Cabrera, A. V., & S\u00e1nchez, E. A. (2019b). Optimization of heavily constrained hybrid-flexible flowshop problems using a multi-agent reinforcement learning approach. Investigacion Operacional, 40(1), 100\u2013111","journal-title":"Investigacion Operacional"},{"issue":"3","key":"1847_CR68","first-page":"281","volume":"38","author":"YCF Reyna","year":"2018","unstructured":"Reyna, Y. C. F., Jim\u00e9nez, Y. M., & Now\u00e9, A. (2018). Q-learning algorithm performance for m-machine n-jobs flow shop scheduling to minimize makespan. Investigaci\u00f3n Operacional, 38(3), 281\u2013290","journal-title":"Investigaci\u00f3n Operacional"},{"issue":"3","key":"1847_CR69","first-page":"225","volume":"36","author":"YCF Reyna","year":"2015","unstructured":"Reyna, Y. C. F., Jim\u00e9nez, Y. M., Berm\u00fadez Cabrera, J. M., & M\u00e9ndez Hern\u00e1ndez, B. M. (2015). A reinforcement learning approach for scheduling problems. Investigacion Operacional, 36(3), 225\u2013231","journal-title":"Investigacion Operacional"},{"key":"1847_CR70","unstructured":"Riedmiller, S., & Riedmiller, M. (1999). A neural reinforcement learning approach to learn local dispatching policies in production scheduling. In IJCAI Iiternational joint conference on artificial intelligence (Vol.\u00a02, pp.\u00a0764\u2013769)."},{"key":"1847_CR71","volume-title":"Artificial intelligence: A modern approach","author":"S Russel","year":"2010","unstructured":"Russel, S., & Norvig, P. (2010). Artificial intelligence: A modern approach. London: Pearson."},{"key":"1847_CR72","doi-asserted-by":"publisher","unstructured":"Schwartz, A. (1993). A reinforcement learning method for maximizing undiscounted rewards. In Proceedings of the tenth international conference on machine learning (Vol.\u00a0298, pp.\u00a0298\u2013305). https:\/\/doi.org\/10.1016\/b978-1-55860-307-3.50045-9","DOI":"10.1016\/b978-1-55860-307-3.50045-9"},{"issue":"101","key":"1847_CR73","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1016\/j.cie.2018.03.039","volume":"125","author":"Y Shiue","year":"2018","unstructured":"Shiue, Y., Lee, K., & Su, C. (2018). Real-time scheduling for a smart factory using a reinforcement learning approach. Computers & Industrial Engineering, 125(101), 604\u2013614. https:\/\/doi.org\/10.1016\/j.cie.2018.03.039","journal-title":"Computers & Industrial Engineering"},{"key":"1847_CR74","doi-asserted-by":"publisher","DOI":"10.1002\/9781118557426","volume-title":"Markov decision processes in artificial intelligence: MDPs, beyond MDPs and applications","author":"O Sigaud","year":"2013","unstructured":"Sigaud, O., & Buffet, O. (2013). Markov Decision Processes in Artificial Intelligence: MDPs, beyond MDPs and applications. New York: Wiley"},{"issue":"1","key":"1847_CR75","doi-asserted-by":"publisher","first-page":"511","DOI":"10.1016\/j.cirp.2018.04.041","volume":"67","author":"N Stricker","year":"2018","unstructured":"Stricker, N., Kuhnle, A., Sturm, R., & Friess, S. (2018). Manufacturing technology reinforcement learning for adaptive order dispatching in the semiconductor industry. CIRP Annals, 67(1), 511\u2013514. https:\/\/doi.org\/10.1016\/j.cirp.2018.04.041","journal-title":"CIRP Annals"},{"key":"1847_CR76","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement Learning: An Introduction. Cambridge: MIT Press"},{"issue":"1","key":"1847_CR77","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2200\/S00268ED1V01Y201005AIM009","volume":"4","author":"C Szepesv\u00e1ri","year":"2010","unstructured":"Szepesv\u00e1ri, C. (2010). Algorithms for reinforcement learning. Synthesis lectures on artificial intelligence and machine learning, 4(1), 1\u2013103. https:\/\/doi.org\/10.2200\/S00268ED1V01Y201005AIM009","journal-title":"Synthesis lectures on artificial intelligence and machine learning"},{"key":"1847_CR78","doi-asserted-by":"publisher","unstructured":"Thomas, T. E., Koo, J., Chaterji, S., & Bagchi, S. (2018). Minerva: A reinforcement learning-based technique for optimal scheduling and bottleneck detection in distributed factory operations. In 2018 10th international conference on communication systems & networks (COMSNETS) (pp.\u00a0129\u2013136). https:\/\/doi.org\/10.1109\/COMSNETS.2018.8328189","DOI":"10.1109\/COMSNETS.2018.8328189"},{"key":"1847_CR79","unstructured":"Van Otterlo, M. (2009). The logic of adaptive behavior: Knowledge representation and algorithms for adaptive sequential decision making under uncertainty in first-order and relational domains. Ios Press"},{"key":"1847_CR80","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/978-1-4757-3264-1_6","volume-title":"In The nature of statistical learning theory","author":"VN Vapnik","year":"2000","unstructured":"Vapnik, V. N. (2000). Methods of pattern recognition. In The nature of statistical learning theory (pp. 123\u2013180). New York, NY: Springer"},{"key":"1847_CR81","doi-asserted-by":"publisher","unstructured":"Wang, H. X., & Yan, H. S. (2013a). An adaptive scheduling system in knowledgeable manufacturing based on multi-agent. In 10th IEEE international conference on control and automation (ICCA) (pp.\u00a0496\u2013501). https:\/\/doi.org\/10.1109\/icca.2013.6564866","DOI":"10.1109\/icca.2013.6564866"},{"key":"1847_CR82","doi-asserted-by":"publisher","first-page":"2347","DOI":"10.4028\/www.scientific.net\/AMM.433-435.2347","volume":"433\u2013435","author":"HX Wang","year":"2013","unstructured":"Wang, H. X., & Yan, H. S. (2013b). An adaptive assembly scheduling approach in knowledgeable manufacturing. Applied Mechanics and Materials, 433\u2013435, 2347\u20132350. https:\/\/doi.org\/10.4028\/www.scientific.net\/AMM.433-435.2347","journal-title":"Applied Mechanics and Materials"},{"issue":"5","key":"1847_CR83","doi-asserted-by":"publisher","first-page":"1085","DOI":"10.1007\/s10845-014-0936-1","volume":"27","author":"HX Wang","year":"2016","unstructured":"Wang, H. X., & Yan, H. S. (2016). An interoperable adaptive scheduling strategy for knowledgeable manufacturing based on SMGWQ-learning. Journal of Intelligent Manufacturing, 27(5), 1085\u20131095. https:\/\/doi.org\/10.1007\/s10845-014-0936-1","journal-title":"Journal of Intelligent Manufacturing"},{"key":"1847_CR84","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2020.1794075","author":"H Wang","year":"2020","unstructured":"Wang, H. X., Sarker, B. R., Li, J., & Li, J. (2020). Adaptive scheduling for assembly job shop with uncertain assembly times based on dual Q- learning. International Journal of Production Research. https:\/\/doi.org\/10.1080\/00207543.2020.1794075","journal-title":"International Journal of Production Research"},{"issue":"6","key":"1847_CR85","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1016\/j.rcim.2004.07.003","volume":"20","author":"YC Wang","year":"2004","unstructured":"Wang, Y. C., & Usher, J. M. (2004). Learning policies for single machine job dispatching. Robotics and Computer-Integrated Manufacturing, 20(6), 553\u2013562. https:\/\/doi.org\/10.1016\/j.rcim.2004.07.003","journal-title":"Robotics and Computer-Integrated Manufacturing"},{"issue":"1","key":"1847_CR86","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.engappai.2004.08.018","volume":"18","author":"YC Wang","year":"2005","unstructured":"Wang, Y. C., & Usher, J. M. (2005). Application of reinforcement learning for agent-based production scheduling. Engineering Applications of Artificial Intelligence, 18(1), 73\u201382. https:\/\/doi.org\/10.1016\/j.engappai.2004.08.018","journal-title":"Engineering Applications of Artificial Intelligence"},{"issue":"3\u20134","key":"1847_CR87","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1007\/s00170-006-0465-y","volume":"33","author":"YC Wang","year":"2007","unstructured":"Wang, Y. C., & Usher, J. M. (2007). A reinforcement learning approach for developing routing policies in multi-agent production scheduling. International Journal of Advanced Manufacturing Technology, 33(3\u20134), 323\u2013333. https:\/\/doi.org\/10.1007\/s00170-006-0465-y","journal-title":"International Journal of Advanced Manufacturing Technology"},{"issue":"2","key":"1847_CR88","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1007\/s10845-018-1454-3","volume":"31","author":"YF Wang","year":"2018","unstructured":"Wang, Y. F. (2018). Adaptive job shop scheduling strategy based on weighted Q-learning algorithm. Journal of Intelligent Manufacturing, 31(2), 417\u2013432. https:\/\/doi.org\/10.1007\/s10845-018-1454-3","journal-title":"Journal of Intelligent Manufacturing"},{"key":"1847_CR89","doi-asserted-by":"publisher","first-page":"1264","DOI":"10.1016\/j.procir.2018.03.212","volume":"72","author":"B Waschneck","year":"2018","unstructured":"Waschneck, B., Reichstaller, A., Belzner, L., Altenm\u00fcller, T., Bauernhansl, T., Knapp, A., & Kyek, A. (2018a). Optimization of global production scheduling with deep reinforcement learning. Procedia CIRP, 72, 1264\u20131269. https:\/\/doi.org\/10.1016\/j.procir.2018.03.212","journal-title":"Procedia CIRP"},{"key":"1847_CR90","doi-asserted-by":"publisher","unstructured":"Waschneck, B., Reichstaller, A., Belzner, L., Altenmuller, T., Bauernhansl, T., Knapp, A., & Kyek, A. (2018b). Deep reinforcement learning for semiconductor production scheduling. In 2018 29th annual SEMI advanced semiconductor manufacturing conference, ASMC 2018 (pp.\u00a0301\u2013306). https:\/\/doi.org\/10.1109\/asmc.2018.8373191","DOI":"10.1109\/asmc.2018.8373191"},{"key":"1847_CR91","doi-asserted-by":"publisher","unstructured":"Wei, Y., & Zhao, M. (2004). Composite rules selection using reinforcement learning for dynamic job-shop scheduling. In 2004 IEEE conference on robotics, automation and mechatronics (Vol.\u00a02, pp.\u00a01083\u20131088). https:\/\/doi.org\/10.1109\/RAMECH.2004.1438070","DOI":"10.1109\/RAMECH.2004.1438070"},{"issue":"12","key":"1847_CR92","doi-asserted-by":"publisher","first-page":"4704","DOI":"10.1016\/j.asoc.2013.07.015","volume":"13","author":"AS Xanthopoulos","year":"2013","unstructured":"Xanthopoulos, A. S., Koulouriotis, D. E., Tourassis, V. D., & Emiris, D. M. (2013). Intelligent controllers for bi-objective dynamic scheduling on a single machine with sequence-dependent setups. Applied Soft Computing Journal, 13(12), 4704\u20134717. https:\/\/doi.org\/10.1016\/j.asoc.2013.07.015","journal-title":"Applied Soft Computing Journal"},{"key":"1847_CR93","doi-asserted-by":"publisher","unstructured":"Xiao, Y., Tan, Q., Zhou, L., & Tang, H. (2017). Stochastic scheduling with compatible job families by an improved Q-learning algorithm. In Chinese Control Conference, CCC (pp.\u00a02657\u20132662). https:\/\/doi.org\/10.23919\/ChiCC.2017.8027764","DOI":"10.23919\/ChiCC.2017.8027764"},{"issue":"3\u20134","key":"1847_CR94","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1007\/s00170-008-1588-0","volume":"42","author":"HB Yang","year":"2009","unstructured":"Yang, H. B., & Yan, H. S. (2009). An adaptive approach to dynamic scheduling in knowledgeable manufacturing cell. International Journal of Advanced Manufacturing Technology, 42(3\u20134), 312\u2013320. https:\/\/doi.org\/10.1007\/s00170-008-1588-0","journal-title":"International Journal of Advanced Manufacturing Technology"},{"key":"1847_CR95","doi-asserted-by":"publisher","unstructured":"Yang, H. B., & Yan, H. S. (2007). An adaptive policy of dynamic scheduling in knowledgeable manufacturing environment. In Proceedings of the IEEE international conference on automation and logistics, ICAL 2007 (pp.\u00a0835\u2013840). https:\/\/doi.org\/10.1109\/ICAL.2007.4338680","DOI":"10.1109\/ICAL.2007.4338680"},{"key":"1847_CR96","doi-asserted-by":"publisher","unstructured":"Yingzi, W. E. I., Xinli, J., & Pingbo, H. A. O. (2009). Pattern Driven Dynamic Scheduling Approach using Reinforcement Learning. In 2009 IEEE international conference on automation and logistics (pp.\u00a0514\u2013519). https:\/\/doi.org\/10.1109\/ICAL.2009.5262867","DOI":"10.1109\/ICAL.2009.5262867"},{"issue":"2","key":"1847_CR97","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1504\/IJSOI.2016.080083","volume":"8","author":"B Yuan","year":"2016","unstructured":"Yuan, B., Jiang, Z., & Wang, L. (2016). Dynamic parallel machine scheduling with random breakdowns using the learning agent. International Journal of Services Operations and Informatics, 8(2), 94\u2013103. https:\/\/doi.org\/10.1504\/IJSOI.2016.080083","journal-title":"International Journal of Services Operations and Informatics"},{"key":"1847_CR98","doi-asserted-by":"publisher","unstructured":"Yuan, B., Wang, L., & Jiang, Z. (2013). Dynamic parallel machine scheduling using the learning agent. In 2013 IEEE international conference on industrial engineering and engineering management (pp.\u00a01565\u20131569). https:\/\/doi.org\/10.1109\/IEEM.2013.6962673","DOI":"10.1109\/IEEM.2013.6962673"},{"key":"1847_CR99","doi-asserted-by":"publisher","unstructured":"Zhang, T., Xie, S., & Rose, O. (2017). Real-time job shop scheduling based on simulation and Markov decision processes. In Proceedings - Winter simulation conference (pp.\u00a03899\u20133907). https:\/\/doi.org\/10.1109\/WSC.2017.8248100","DOI":"10.1109\/WSC.2017.8248100"},{"key":"1847_CR100","doi-asserted-by":"publisher","unstructured":"Zhang, T., Xie, S., & Rose, O. (2018). Real-time batching in job shops based on simulation and reinforcement learning. In 2018 Winter simulation conference (WSC) (pp.\u00a03331\u20133339). https:\/\/doi.org\/10.1109\/WSC.2018.8632524","DOI":"10.1109\/WSC.2018.8632524"},{"key":"1847_CR101","unstructured":"Zhang, W., & Dietterich, T. G. (1995). A reinforcement learning approach to job-shop scheduling. In 1995 International joint conference on artificial intelligence (pp.\u00a01114\u20131120)."},{"key":"1847_CR102","first-page":"1024","volume":"91","author":"W Zhang","year":"1996","unstructured":"Zhang, W., & Dietterich, T. G. (1996). High-performance job-shop scheduling with a time-delay TD (\u03bb) network. Advances in Neural Information Processing Systems, 91, 1024\u20131030","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"2","key":"1847_CR103","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1016\/j.ejor.2011.05.052","volume":"215","author":"Z Zhang","year":"2011","unstructured":"Zhang, Z., Zheng, L., Hou, F., & Li, N. (2011). Semiconductor final test scheduling with Sarsa(\u03bb, k) algorithm. European Journal of Operational Research, 215(2), 446\u2013458. https:\/\/doi.org\/10.1016\/j.ejor.2011.05.052","journal-title":"European Journal of Operational Research"},{"issue":"7","key":"1847_CR104","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1016\/j.cor.2011.07.019","volume":"39","author":"Z Zhang","year":"2012","unstructured":"Zhang, Z., Zheng, L., Li, N., Wang, W., Zhong, S., & Hu, K. (2012). Minimizing mean weighted tardiness in unrelated parallel machine scheduling with reinforcement learning. Computers and Operations Research, 39(7), 1315\u20131324. https:\/\/doi.org\/10.1016\/j.cor.2011.07.019","journal-title":"Computers and Operations Research"},{"issue":"9\u201310","key":"1847_CR105","doi-asserted-by":"publisher","first-page":"968","DOI":"10.1007\/s00170-006-0662-8","volume":"34","author":"Z Zhang","year":"2007","unstructured":"Zhang, Z., Zheng, L., & Weng, M. X. (2007). Dynamic parallel machine scheduling with mean weighted tardiness objective by Q-learning. International Journal of Advanced Manufacturing Technology, 34(9\u201310), 968\u2013980. https:\/\/doi.org\/10.1007\/s00170-006-0662-8","journal-title":"International Journal of Advanced Manufacturing Technology"},{"key":"1847_CR106","doi-asserted-by":"publisher","unstructured":"Zhao, M., Li, X., Gao, L., Wang, L., & Xiao, M. (2019). An improved Q-learning based rescheduling method for flexible job-shops with machine failures. In 2019 IEEE 15th international conference on automation science and engineering (CASE) (pp.\u00a0331\u2013337). https:\/\/doi.org\/10.1109\/COASE.2019.8843100","DOI":"10.1109\/COASE.2019.8843100"},{"key":"1847_CR107","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1016\/j.procir.2020.05.163","volume":"93","author":"L Zhou","year":"2020","unstructured":"Zhou, L., Zhang, L., & Horn, B. K. P. (2020). Deep reinforcement learning-based dynamic scheduling in smart manufacturing. Procedia CIRP, 93, 383\u2013388. https:\/\/doi.org\/10.1016\/j.procir.2020.05.163","journal-title":"Procedia CIRP"}],"container-title":["Journal of Intelligent Manufacturing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10845-021-01847-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10845-021-01847-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10845-021-01847-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,13]],"date-time":"2023-02-13T22:38:52Z","timestamp":1676327932000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10845-021-01847-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,19]]},"references-count":107,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["1847"],"URL":"https:\/\/doi.org\/10.1007\/s10845-021-01847-3","relation":{},"ISSN":["0956-5515","1572-8145"],"issn-type":[{"value":"0956-5515","type":"print"},{"value":"1572-8145","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,10,19]]},"assertion":[{"value":"2 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 September 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}