{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T08:47:40Z","timestamp":1758444460780,"version":"3.44.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T00:00:00Z","timestamp":1726876800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T00:00:00Z","timestamp":1726876800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Manuf"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s10845-024-02492-2","type":"journal-article","created":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T12:01:57Z","timestamp":1726920117000},"page":"4997-5013","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dynamic confidence-based constraint adjustment in distributional constrained policy optimization: enhancing supply chain management through adaptive reinforcement learning"],"prefix":"10.1007","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-5684-2751","authenticated-orcid":false,"given":"Youness","family":"Boutyour","sequence":"first","affiliation":[]},{"given":"Abdellah","family":"Idrissi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,21]]},"reference":[{"key":"2492_CR1","unstructured":"Achiam, J., Held, D., Tamar, A., & Abbeel, P. (2017). Constrained policy optimization. In International conference on machine learning (Vol. 1, pp. 30\u201347)."},{"key":"2492_CR2","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223","volume-title":"Constrained Markov decision processes","author":"E Altman","year":"2021","unstructured":"Altman, E. (2021). Constrained Markov decision processes. Routledge."},{"key":"2492_CR3","first-page":"19944","volume":"35","author":"K Asadi","year":"2022","unstructured":"Asadi, K., Fakoor, R., Gottesman, O., Kim, T., Littman, M., & Smola, A. J. (2022). Faster deep reinforcement learning with slower online network. Advances in Neural Information Processing Systems, 35, 19944\u201319955.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2492_CR4","unstructured":"Bellemare, M. G., Dabney, W., & Munos, R. (2017). A distributional perspective on reinforcement learning. In 34th International conference on machine learning, ICML 2017 (Vol. 1, pp. 693\u2013711)."},{"key":"2492_CR5","doi-asserted-by":"publisher","unstructured":"Berm\u00fadez, J. S., del Rio Chanona, A., & Tsay, C. (2023). Distributional constrained reinforcement learning for supply chain optimization. In A. C. Kokossis, M. C. Georgiadis, & E. Pistikopoulos (Eds.), 33rd European symposium on computer aided process engineering. Computer aided chemical engineering (Vol. 52, pp. 1649\u20131654. https:\/\/doi.org\/10.1016\/B978-0-443-15274-0.50262-6","DOI":"10.1016\/B978-0-443-15274-0.50262-6"},{"key":"2492_CR6","unstructured":"Bertsekas, D (2012) Dynamic Programming and Optimal Control: Volume I vol. 4,"},{"key":"2492_CR7","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3415550","author":"Y Boutyour","year":"2024","unstructured":"Boutyour, Y., & Idrissi, A. (2024). Adaptive decentralized policies with attention for large-scale multi-agent environments. IEEE Transactions on Artificial Intelligence. https:\/\/doi.org\/10.1109\/TAI.2024.3415550","journal-title":"IEEE Transactions on Artificial Intelligence"},{"key":"2492_CR8","doi-asserted-by":"publisher","unstructured":"Boutyour, Y., & Idrissi, A. (2023). Deep reinforcement learning in financial markets context: Review and open challenges. In Modern artificial intelligence and data science: Tools, techniques and systems (pp. 49\u201366). Springer. https:\/\/doi.org\/10.1007\/978-3-031-33309-5_5","DOI":"10.1007\/978-3-031-33309-5_5"},{"issue":"2","key":"2492_CR9","first-page":"68","volume":"22","author":"Y Boutyour","year":"2024","unstructured":"Boutyour, Y., & Idrissi, A. (2024). A deep reinforcement learning approach to dynamic airline ticket pricing and customer response analysis. International Journal of Artificial Intelligence, 22(2), 68\u201391.","journal-title":"International Journal of Artificial Intelligence"},{"key":"2492_CR10","unstructured":"Dabney, W., Ostrovski, G., Silver, D., & Munos, R. (2018). Implicit quantile networks for distributional reinforcement learning. In 35th International conference on machine learning, ICML 2018 (Vol. 3, pp. 1774\u20131787)."},{"key":"2492_CR11","doi-asserted-by":"publisher","unstructured":"Dabney, W., Rowland, M., Bellemare, M., & Munos, R. (2018). Distributional reinforcement learning with quantile regression. In Proceedings of the AAAI conference on artificial intelligence (Vol. 32(1)). https:\/\/doi.org\/10.1609\/aaai.v32i1.11791","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"2492_CR12","unstructured":"Dalal, G., Dvijotham, K., Vecerik, M., Hester, T., Paduraru, C., & Tassa, Y. (2018). Safe exploration in continuous action spaces. arXiv preprint. arXiv:1801.08757"},{"issue":"5","key":"2492_CR13","doi-asserted-by":"publisher","first-page":"8561","DOI":"10.3934\/mbe.2023376","volume":"20","author":"V Djordjevic","year":"2023","unstructured":"Djordjevic, V., Tao, H., Song, X., He, S., Gao, W., & Stojanovic, V. (2023). Data-driven control of hydraulic servo actuator: An event-triggered adaptive dynamic programming approach. Mathematical Biosciences and Engineering, 20(5), 8561\u20138582. https:\/\/doi.org\/10.3934\/mbe.2023376","journal-title":"Mathematical Biosciences and Engineering"},{"key":"2492_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0","volume-title":"Deep reinforcement learning","author":"H Dong","year":"2020","unstructured":"Dong, H., Dong, H., Ding, Z., Zhang, S., & Chang, T. (2020). Deep reinforcement learning. Springer."},{"key":"2492_CR15","first-page":"1437","volume":"16","author":"J Garc\u00eda","year":"2015","unstructured":"Garc\u00eda, J., & Fern\u00e1ndez, F. (2015). A comprehensive survey on safe reinforcement learning. Journal of Machine Learning Research, 16, 1437\u20131480.","journal-title":"Journal of Machine Learning Research"},{"key":"2492_CR16","unstructured":"Hubbs, C. D., Perez, H. D., Sarwar, O., Sahinidis, N. V., Grossmann, I. E., & Wassick, J. M. (2020). OR-GYM: a reinforcement learning library for operations research problems. arXiv preprint. arXiv:2008.06319"},{"key":"2492_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2020.106994","volume":"141","author":"TJ Ikonen","year":"2020","unstructured":"Ikonen, T. J., Heljanko, K., & Harjunkoski, I. (2020). Reinforcement learning of adaptive online rescheduling timing and computing time allocation. Computers and Chemical Engineering, 141, 106994. https:\/\/doi.org\/10.1016\/j.compchemeng.2020.106994","journal-title":"Computers and Chemical Engineering"},{"key":"2492_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2021.105042","volume":"121","author":"Y Jiang","year":"2022","unstructured":"Jiang, Y., Gao, W., Na, J., Zhang, D., H\u00e4m\u00e4l\u00e4inen, T. T., Stojanovic, V., & Lewis, F. L. (2022). Value iteration and adaptive optimal output regulation with assured convergence rate. Control Engineering Practice, 121, 105042. https:\/\/doi.org\/10.1016\/j.conengprac.2021.105042","journal-title":"Control Engineering Practice"},{"key":"2492_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-024-02337-y","author":"H Khosravi","year":"2024","unstructured":"Khosravi, H., Olajire, T., Raihan, A. S., & Ahmed, I. (2024). A data driven sequential learning framework to accelerate and optimize multi-objective manufacturing decisions. Journal of Intelligent Manufacturing. https:\/\/doi.org\/10.1007\/s10845-024-02337-y","journal-title":"Journal of Intelligent Manufacturing"},{"key":"2492_CR20","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1016\/j.cirpj.2022.11.003","volume":"40","author":"C Li","year":"2023","unstructured":"Li, C., Zheng, P., Yin, Y., Wang, B., & Wang, L. (2023). Deep reinforcement learning in smart manufacturing: A review and prospects. CIRP Journal of Manufacturing Science and Technology, 40, 75\u2013101. https:\/\/doi.org\/10.1016\/j.cirpj.2022.11.003","journal-title":"CIRP Journal of Manufacturing Science and Technology"},{"issue":"5","key":"2492_CR21","doi-asserted-by":"publisher","first-page":"2235","DOI":"10.1007\/s10845-023-02159-4","volume":"35","author":"W Li","year":"2023","unstructured":"Li, W., Hu, Y., Zhou, Y., & Pham, D. T. (2023). Safe human\u2013robot collaboration for industrial settings: A survey. Journal of Intelligent Manufacturing, 35(5), 2235\u20132261. https:\/\/doi.org\/10.1007\/s10845-023-02159-4","journal-title":"Journal of Intelligent Manufacturing"},{"key":"2492_CR22","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-024-02371-w","author":"S Liu","year":"2024","unstructured":"Liu, S., Shi, Z., Lin, J., & Yu, H. (2024). A generalisable tool path planning strategy for free-form sheet metal stamping through deep reinforcement and supervised learning. Journal of Intelligent Manufacturing. https:\/\/doi.org\/10.1007\/s10845-024-02371-w","journal-title":"Journal of Intelligent Manufacturing"},{"key":"2492_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Y., Ding, J., & Liu, X.: IPO (2020) Interior-point Policy Optimization Under Constraints. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 4940\u20134947","DOI":"10.1609\/aaai.v34i04.5932"},{"key":"2492_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-023-02258-2","author":"A Loffredo","year":"2023","unstructured":"Loffredo, A., May, M. C., Matta, A., & Lanza, G. (2023). Reinforcement learning for sustainability enhancement of production lines. Journal of Intelligent Manufacturing. https:\/\/doi.org\/10.1007\/s10845-023-02258-2","journal-title":"Journal of Intelligent Manufacturing"},{"key":"2492_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107600","volume":"235","author":"F Lu","year":"2022","unstructured":"Lu, F., Yan, T., Bi, H., Feng, M., Wang, S., & Huang, M. (2022). A bilevel whale optimization algorithm for risk management scheduling of information technology projects considering outsourcing. Knowledge-Based Systems, 235, 107600. https:\/\/doi.org\/10.1016\/j.knosys.2021.107600","journal-title":"Knowledge-Based Systems"},{"key":"2492_CR26","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-024-02472-6","author":"MC May","year":"2024","unstructured":"May, M. C., Oberst, J., & Lanza, G. (2024). Managing product-inherent constraints with artificial intelligence: Production control for time constraints in semiconductor manufacturing. Journal of Intelligent Manufacturing. https:\/\/doi.org\/10.1007\/s10845-024-02472-6","journal-title":"Journal of Intelligent Manufacturing"},{"key":"2492_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107403","author":"N Mohamadi","year":"2024","unstructured":"Mohamadi, N., Niaki, S. T. A., Taher, M., & Shavandi, A. (2024). An application of deep reinforcement learning and vendor-managed inventory in perishable supply chain management. Engineering Applications of Artificial Intelligence. https:\/\/doi.org\/10.1016\/j.engappai.2023.107403","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"2492_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2020.106886","volume":"139","author":"R Nian","year":"2020","unstructured":"Nian, R., Liu, J., & Huang, B. (2020). A review on reinforcement learning: Introduction and applications in industrial process control. Computers and Chemical Engineering, 139, 106886. https:\/\/doi.org\/10.1016\/j.compchemeng.2020.106886","journal-title":"Computers and Chemical Engineering"},{"issue":"13","key":"2492_CR29","doi-asserted-by":"publisher","first-page":"4316","DOI":"10.1080\/00207543.2021.1973138","volume":"60","author":"M Panzer","year":"2021","unstructured":"Panzer, M., & Bender, B. (2021). Deep reinforcement learning in production systems: A systematic literature review. International Journal of Production Research, 60(13), 4316\u20134341. https:\/\/doi.org\/10.1080\/00207543.2021.1973138","journal-title":"International Journal of Production Research"},{"issue":"1","key":"2492_CR30","doi-asserted-by":"publisher","first-page":"102","DOI":"10.3390\/pr9010102","volume":"9","author":"HD Perez","year":"2021","unstructured":"Perez, H. D., Hubbs, C. D., Li, C., & Grossmann, I. E. (2021). Algorithmic approaches to inventory management optimization. Processes, 9(1), 102. https:\/\/doi.org\/10.3390\/pr9010102","journal-title":"Processes"},{"key":"2492_CR31","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/j.jprocont.2022.01.003","volume":"111","author":"P Petsagkourakis","year":"2022","unstructured":"Petsagkourakis, P., Sandoval, I. O., Bradford, E., Galvanin, F., Zhang, D., & Rio-Chanona, E. A. (2022). Chance constrained policy optimization for process control and optimization. Journal of Process Control, 111, 35\u201345. https:\/\/doi.org\/10.1016\/j.jprocont.2022.01.003","journal-title":"Journal of Process Control"},{"key":"2492_CR32","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., & Moritz, P. (2015). Trust region policy optimization. In F. Bach & D. Blei (Eds.), Proceedings of the 32nd international conference on machine learning research (PMLR) (Vol. 37, pp. 1889\u20131897). Lille. https:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"2492_CR33","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1016\/j.compchemeng.2019.05.029","volume":"127","author":"J Shin","year":"2019","unstructured":"Shin, J., Badgwell, T. A., Liu, K.-H., & Lee, J. H. (2019). Reinforcement learning\u2014overview of recent progress and implications for process control. Computers and Chemical Engineering, 127, 282\u2013294. https:\/\/doi.org\/10.1016\/j.compchemeng.2019.05.029","journal-title":"Computers and Chemical Engineering"},{"key":"2492_CR34","doi-asserted-by":"publisher","DOI":"10.1007\/s42979-020-00326-5","author":"C Shyalika","year":"2020","unstructured":"Shyalika, C., Silva, T., & Karunananda, A. (2020). Reinforcement learning in dynamic task scheduling: A review. SN Computer Science. https:\/\/doi.org\/10.1007\/s42979-020-00326-5","journal-title":"SN Computer Science"},{"key":"2492_CR35","unstructured":"Sootla, A., Cowen-Rivers, A.I., Jafferjee, T., Wang, Z., Mguni, D.H., Wang, J., Ammar, H.: Saute RL: Almost Surely Safe Reinforcement Learning Using State Augmentation. In: Proceedings of the 39th International Conference on Machine Learning, vol. 162, pp. 20423\u201320443 (2022). https:\/\/proceedings.mlr.press\/v162\/sootla22a.html"},{"issue":"3","key":"2492_CR36","doi-asserted-by":"publisher","first-page":"181","DOI":"10.3934\/mmc.2023016","volume":"3","author":"V Stojanovic","year":"2023","unstructured":"Stojanovic, V. (2023). Fault-tolerant control of a hydraulic servo actuator via adaptive dynamic programming. Mathematical Modelling and Control, 3(3), 181\u2013191. https:\/\/doi.org\/10.3934\/mmc.2023016","journal-title":"Mathematical Modelling and Control"},{"key":"2492_CR37","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction. Stanford: Stanford University."},{"key":"2492_CR38","unstructured":"Sutton, R. S., McAllester, D., Singh, S., & Mansour, Y. (1999). Policy gradient methods for reinforcement learning with function approximation. Advances in Neural Information Processing Systems, 12, 1057\u20131063."},{"key":"2492_CR39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0","volume-title":"Handbook of reinforcement learning and control","author":"KG Vamvoudakis","year":"2021","unstructured":"Vamvoudakis, K. G., Wan, Y., Lewis, F. L., & Cansever, D. (2021). Handbook of reinforcement learning and control. Springer."},{"key":"2492_CR40","doi-asserted-by":"publisher","unstructured":"Wang, J., Zhang, Q., Zhao, D., & Chen, Y. (2019). Lane change decision-making through deep reinforcement learning with rule-based constraints. In 2019 International joint conference on neural networks (IJCNN) (Vol. 2019, July 2019. https:\/\/doi.org\/10.1109\/ijcnn.2019.8852110","DOI":"10.1109\/ijcnn.2019.8852110"},{"key":"2492_CR41","doi-asserted-by":"publisher","unstructured":"Wang, S., Li, J., Jiao, Q., & Ma, F. (2024). Design patterns of deep reinforcement learning models for job shop scheduling problems. Journal of Intelligent Manufacturing. https:\/\/doi.org\/10.1007\/s10845-024-02454-8","DOI":"10.1007\/s10845-024-02454-8"},{"issue":"5","key":"2492_CR42","doi-asserted-by":"publisher","first-page":"6567","DOI":"10.1007\/s11227-021-04127-2","volume":"78","author":"H Wen","year":"2021","unstructured":"Wen, H., Wang, S. X., Lu, F. Q., Feng, M., Wang, L. Z., Xiong, J. K., & Si, M. C. (2021). Colony search optimization algorithm using global optimization. The Journal of Supercomputing, 78(5), 6567\u20136611. https:\/\/doi.org\/10.1007\/s11227-021-04127-2","journal-title":"The Journal of Supercomputing"}],"container-title":["Journal of Intelligent Manufacturing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10845-024-02492-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10845-024-02492-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10845-024-02492-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T08:06:13Z","timestamp":1758355573000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10845-024-02492-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,21]]},"references-count":42,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["2492"],"URL":"https:\/\/doi.org\/10.1007\/s10845-024-02492-2","relation":{},"ISSN":["0956-5515","1572-8145"],"issn-type":[{"type":"print","value":"0956-5515"},{"type":"electronic","value":"1572-8145"}],"subject":[],"published":{"date-parts":[[2024,9,21]]},"assertion":[{"value":"3 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 September 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}