{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:56:07Z","timestamp":1781592967227,"version":"3.54.5"},"reference-count":51,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.engappai.2026.115166","type":"journal-article","created":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T01:17:32Z","timestamp":1779412652000},"page":"115166","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"P1","title":["Dual-agent reinforcement learning network with knowledge injection for multi-objective control of tandem cold rolling"],"prefix":"10.1016","volume":"179","author":[{"given":"Shang","family":"Chen","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiawei","family":"Lei","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yunjian","family":"Hu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wen","family":"Peng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dianhua","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jifei","family":"Deng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4918-9217","authenticated-orcid":false,"given":"Jie","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.115166_bib1","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.127251","article-title":"Outsmarting algorithms: a comparative battle between reinforcement learning and heuristics in atari tetris","volume":"277","author":"Bairaktaris","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.engappai.2026.115166_bib2","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110998","article-title":"Localizing state space for visual reinforcement learning in noisy environments","volume":"156","author":"Cheng","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib3","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1016\/j.ins.2023.03.019","article-title":"Offline reinforcement learning for industrial process control: a case from steel","volume":"632","author":"Deng","year":"2023","journal-title":"Inf. Sci."},{"key":"10.1016\/j.engappai.2026.115166_bib4","article-title":"A reinforcement learning integral sliding mode control scheme against lumped disturbances in hot strip rolling","volume":"465","author":"Ding","year":"2024","journal-title":"Appl. Math. Comput."},{"key":"10.1016\/j.engappai.2026.115166_bib5","doi-asserted-by":"crossref","DOI":"10.1016\/j.cie.2025.111342","article-title":"Deep reinforcement learning for data-driven scheduling in multi-variety and small-batch flexible job shops: integrating fluid models for enhanced optimization","volume":"208","author":"Ding","year":"2025","journal-title":"Comput. Ind. Eng."},{"key":"10.1016\/j.engappai.2026.115166_bib6","doi-asserted-by":"crossref","first-page":"1545","DOI":"10.1109\/TPWRS.2024.3423381","article-title":"Real-time scheduling of high-penetrated renewable power systems: an expert knowledge and reinforcement learning hybrid approach","volume":"40","author":"Du","year":"2025","journal-title":"IEEE Trans. Power Syst."},{"key":"10.1016\/j.engappai.2026.115166_bib7","doi-asserted-by":"crossref","first-page":"6584","DOI":"10.1109\/TNNLS.2021.3082568","article-title":"Distributional soft actor-critic: off-policy reinforcement learning for addressing value estimation errors","volume":"33","author":"Duan","year":"2022","journal-title":"IEEE Transact. Neural Networks Learn. Syst."},{"key":"10.1016\/j.engappai.2026.115166_bib8","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2025.112128","article-title":"Distributionally robust LQG control under distributed uncertainty","volume":"174","author":"Falconi","year":"2025","journal-title":"Automatica"},{"key":"10.1016\/j.engappai.2026.115166_bib9","doi-asserted-by":"crossref","first-page":"16864","DOI":"10.1109\/TASE.2025.3579744","article-title":"Time-varying optimal sliding-mode lag formation control for high-order nonlinear multiagent systems based on reinforcement learning","volume":"22","author":"Fu","year":"2025","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"10.1016\/j.engappai.2026.115166_bib10","unstructured":"Fujimoto, S., van Hoof, H., & Meger, D. Addressing function approximation error in actor-critic methods. In J. Dy & A. Krause (Eds.), Proceedings of the 35th International Conference on Machine Learning (ICML 2018) (Vol. 80, pp. 1587-1596)."},{"key":"10.1016\/j.engappai.2026.115166_bib11","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1016\/j.jprocont.2025.103510","article-title":"An integrated performance degradation detection and recovery scheme incorporating 2-DOF controllers for feedback control systems","volume":"153","author":"Gao","year":"2025","journal-title":"J. Process Control"},{"key":"10.1016\/j.engappai.2026.115166_bib12","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110791","article-title":"Autonomous control of soft robots using safe reinforcement learning and covariance matrix adaptation","volume":"153","author":"Garg","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib13","doi-asserted-by":"crossref","first-page":"5323","DOI":"10.1109\/TCYB.2024.3388470","article-title":"Actor-critic with synthesis loss for solving approximation biases","volume":"54","author":"Guo","year":"2024","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.engappai.2026.115166_bib14","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., & Levine, S. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In J. Dy & A. Krause (Eds.), Proceedings of the 35th International Conference on Machine Learning (ICML 2018) (Vol. 80, pp. 1861-1870)."},{"key":"10.1016\/j.engappai.2026.115166_bib15","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1007\/s44430-025-00004-2","article-title":"Model-free trajectory tracking control of a 5-DOF Mitsubishi robotic arm using deep deterministic policy gradient algorithm","volume":"1","author":"Hazem","year":"2025","journal-title":"Discover Robotics"},{"key":"10.1016\/j.engappai.2026.115166_bib16","doi-asserted-by":"crossref","first-page":"56","DOI":"10.3390\/automation6040056","article-title":"A hybrid reinforcement learning framework combining TD3 and PID control for robust trajectory tracking of a 5-dof robotic arm","volume":"6","author":"Hazem","year":"2025","journal-title":"Automation"},{"key":"10.1016\/j.engappai.2026.115166_bib17","doi-asserted-by":"crossref","first-page":"1982","DOI":"10.1007\/s41315-025-00475-x","article-title":"Reinforcement learning-based intelligent trajectory tracking for a 5-DOF Mitsubishi robotic arm: comparative evaluation of DDPG, LC-DDPG, and TD3-ADX","volume":"9","author":"Hazem","year":"2025","journal-title":"International Journal of Intelligent Robotics and Applications"},{"key":"10.1016\/j.engappai.2026.115166_bib18","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1016\/j.isatra.2021.12.030","article-title":"Distributed model predictive control based on neighborhood optimization for thickness and tension control system in tandem cold rolling mill","volume":"129","author":"Hu","year":"2022","journal-title":"ISA (Instrum. Soc. Am.) Trans."},{"key":"10.1016\/j.engappai.2026.115166_bib19","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110443","article-title":"An effective exploration method based on N-step updated Dirichlet distribution and Dempster-Shafer theory for deep reinforcement learning","volume":"149","author":"Huang","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib20","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.129168","article-title":"Reinforcement Learning in action: powering intelligent intrusion responses to advanced cyber threats in realistic scenarios","volume":"296","author":"Iturbe","year":"2026","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.engappai.2026.115166_bib21","doi-asserted-by":"crossref","first-page":"54","DOI":"10.3390\/lubricants13020054","article-title":"Intelligent analysis and optimization of lubrication status factor based on dynamically loaded roll gap in cold strip rolling","volume":"13","author":"Jin","year":"2025","journal-title":"Lubricants"},{"key":"10.1016\/j.engappai.2026.115166_bib22","first-page":"56","article-title":"Interpretable machine learning-based rolling force prediction using multivariate industrial data during tandem cold rolling","volume":"6","author":"Li","year":"2025","journal-title":"Ironmak. Steelmak."},{"key":"10.1016\/j.engappai.2026.115166_bib23","first-page":"97","article-title":"Quadcopter trajectory tracking based on model predictive path integral control and neural network","volume":"9","author":"Li","year":"2025","journal-title":"Drones"},{"key":"10.1016\/j.engappai.2026.115166_bib24","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110812","article-title":"Automatic inverse design of second-order differential metasurfaces based on reinforcement learning","volume":"153","author":"Li","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib25","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"ArXiv"},{"key":"10.1016\/j.engappai.2026.115166_bib26","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110676","article-title":"Continuous reinforcement learning via advantage value difference reward shaping: a proximal policy optimization perspective","volume":"151","author":"Lin","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib27","doi-asserted-by":"crossref","first-page":"1542","DOI":"10.1109\/TKDE.2025.3528219","article-title":"Adversarial conservative alternating Q-learning for credit card debt collection","volume":"37","author":"Liu","year":"2025","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.engappai.2026.115166_bib28","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.111486","article-title":"Dynamic visual attention-based neuron awakening and shifting in deep reinforcement learning","volume":"158","author":"Ma","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib29","doi-asserted-by":"crossref","DOI":"10.1016\/j.oceaneng.2025.121595","article-title":"Physical system modeling and optimized control strategy of 6-DOF vessel motion simulator based on MBD and LBM","volume":"334","author":"Ma","year":"2025","journal-title":"Ocean. Eng."},{"key":"10.1016\/j.engappai.2026.115166_bib30","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1109\/TAC.2024.3417717","article-title":"Maximization of gain\/phase margins by PID control","volume":"70","author":"Mao","year":"2025","journal-title":"IEEE Trans. Automat. Control"},{"key":"10.1016\/j.engappai.2026.115166_bib31","unstructured":"Mnih, V., Badia, A. P., Mirza, M., Graves, A., Harley, T., & Lillicrap, T. P. Asynchronous methods for deep reinforcement learning. In M. F. Balcan & K. Q. Weinberger (Eds.), Proceedings of the 33rd International Conference on Machine Learning (ICML 2016) (Vol. 48, pp. 1928-1937)."},{"key":"10.1016\/j.engappai.2026.115166_bib32","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.127180","article-title":"Application of Soft Actor-Critic algorithms in optimizing wastewater treatment with time delays integration","volume":"277","author":"Mohammadi","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.engappai.2026.115166_bib33","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1016\/j.jmapro.2024.05.081","article-title":"Modeling imaged welding process dynamic behaviors using generative adversarial network (GAN) for a new foundation to monitor weld penetration using deep learning","volume":"124","author":"Mucllari","year":"2024","journal-title":"J. Manuf. Process."},{"key":"10.1016\/j.engappai.2026.115166_bib34","doi-asserted-by":"crossref","first-page":"1522","DOI":"10.1109\/JAS.2024.124746","article-title":"Efficient knowledge-guided self-evolving intelligent behavioral control for autonomous vehicles","volume":"12","author":"Peng","year":"2025","journal-title":"IEEE\/CAA J. Autom. Sin."},{"key":"10.1016\/j.engappai.2026.115166_bib35","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.108695","article-title":"A novel deep ensemble reinforcement learning based control method for strip flatness in cold rolling steel industry","volume":"134","author":"Peng","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib36","doi-asserted-by":"crossref","first-page":"10071","DOI":"10.1109\/TIE.2025.3546360","article-title":"Generalized discretization of state-space model for discrete-time small-signal analysis of inverters with arbitrary control delay","volume":"72","author":"Qian","year":"2025","journal-title":"IEEE Trans. Ind. Electron."},{"key":"10.1016\/j.engappai.2026.115166_bib37","unstructured":"Schulman, J., Levine, S., Moritz, P., Jordan, M., & Abbeel, P. Trust region policy optimization. In F. Bach & D. Blei (Eds.), Proceedings of the 32nd International Conference on Machine Learning (ICML 2015) (Vol. 37, pp. 1889-1897)."},{"key":"10.1016\/j.engappai.2026.115166_bib38","doi-asserted-by":"crossref","unstructured":"Song, W. T., Ning, J., & Tong, S. C. (2025). Inverse Q-learning optimal control for Takagi-Sugeno fuzzy systems. IEEE Trans. Fuzzy Syst., 33, 2308-2320. https:\/\/doi.org\/10.3390\/drones9010009.","DOI":"10.1109\/TFUZZ.2025.3563361"},{"key":"10.1016\/j.engappai.2026.115166_bib39","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.111124","article-title":"CTPR: contrastive transition predictive representation for reinforcement learning","volume":"156","author":"Sun","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib40","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.joes.2022.05.015","article-title":"Proportional-integral-derivative controller optimization by particle swarm optimization and back propagation neural network for a parallel stabilized platform in marine operations","volume":"10","author":"Tang","year":"2025","journal-title":"J. Ocean Eng. Sci."},{"key":"10.1016\/j.engappai.2026.115166_bib41","doi-asserted-by":"crossref","first-page":"1860","DOI":"10.1109\/TCYB.2025.3538787","article-title":"Multiplayer differential games of Markov jump systems via reinforcement learning","volume":"55","author":"Wu","year":"2025","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.engappai.2026.115166_bib42","doi-asserted-by":"crossref","first-page":"16864","DOI":"10.1109\/TNNLS.2025.3557930","article-title":"A two-stage selective experience replay for double-actor deep reinforcement learning","volume":"36","author":"Xu","year":"2025","journal-title":"IEEE Transact. Neural Networks Learn. Syst."},{"key":"10.1016\/j.engappai.2026.115166_bib43","doi-asserted-by":"crossref","DOI":"10.1016\/j.cie.2025.111362","article-title":"Research on the dynamic scheduling problem of flexible job batch shop based on parallel proximal policy optimization algorithm","volume":"207","author":"Yan","year":"2025","journal-title":"Comput. Ind. Eng."},{"key":"10.1016\/j.engappai.2026.115166_bib44","doi-asserted-by":"crossref","DOI":"10.1016\/j.jprocont.2025.103435","article-title":"Adaptive fuzzy-bilateral prescribed performance control for nonlinear systems with uncertain time delays and its application","volume":"150","author":"Yang","year":"2025","journal-title":"J. Process Control"},{"key":"10.1016\/j.engappai.2026.115166_bib45","doi-asserted-by":"crossref","first-page":"10533","DOI":"10.1109\/TASE.2024.3524472","article-title":"Distributed intelligent control method based on state self-learning and its application in cascade processes","volume":"22","author":"Yin","year":"2025","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"10.1016\/j.engappai.2026.115166_bib46","first-page":"261","article-title":"Prediction of transverse thickness difference in medium- and high-carbon steels based on transformer and optimisation of rolling schedules","volume":"52","author":"Yuan","year":"2025","journal-title":"Ironmak. Steelmak."},{"key":"10.1016\/j.engappai.2026.115166_bib47","doi-asserted-by":"crossref","first-page":"4576","DOI":"10.1109\/TII.2025.3545048","article-title":"Robotic disassembly skill acquisition based on reinforcement learning with external knowledge injection","volume":"21","author":"Zang","year":"2025","journal-title":"IEEE Trans. Ind. Inf."},{"key":"10.1016\/j.engappai.2026.115166_bib48","doi-asserted-by":"crossref","first-page":"7277","DOI":"10.1007\/s00170-022-09239-4","article-title":"DDPG-based continuous thickness and tension coupling control for the unsteady cold rolling process","volume":"120","author":"Zeng","year":"2022","journal-title":"Int. J. Adv. Manuf. Technol."},{"key":"10.1016\/j.engappai.2026.115166_bib49","doi-asserted-by":"crossref","first-page":"6123","DOI":"10.1007\/s00170-025-16331-y","article-title":"Multi-agent constraint reinforcement learning-based distributed control strategies for thickness and tension on tandem cold rolling system","volume":"139","author":"Zhao","year":"2025","journal-title":"Int. J. Adv. Manuf. Technol."},{"key":"10.1016\/j.engappai.2026.115166_bib50","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.112409","article-title":"Adaptive resource management in dynamic cyber-physical systems using artificial intelligence","volume":"162","author":"Zhao","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115166_bib51","article-title":"Deep deterministic policy gradient algorithm based on dung beetle optimization and priority experience replay mechanism","volume":"15","author":"Zhu","year":"2025","journal-title":"Sci. Rep."}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626014491?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626014491?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:47:15Z","timestamp":1781592435000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626014491"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":51,"alternative-id":["S0952197626014491"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.115166","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Dual-agent reinforcement learning network with knowledge injection for multi-objective control of tandem cold rolling","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.115166","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115166"}}