{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T00:58:00Z","timestamp":1776128280046,"version":"3.50.1"},"reference-count":50,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFB1711700"],"award-info":[{"award-number":["2020YFB1711700"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62136003"],"award-info":[{"award-number":["62136003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276097"],"award-info":[{"award-number":["62276097"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372174"],"award-info":[{"award-number":["62372174"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.eswa.2026.131850","type":"journal-article","created":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T16:50:42Z","timestamp":1773075042000},"page":"131850","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Efficient and safe decision-making in reinforcement learning: One-step anticipatory policy selector with adaptive safety thresholds"],"prefix":"10.1016","volume":"318","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5253-9775","authenticated-orcid":false,"given":"Jiahao","family":"Pan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6083-3440","authenticated-orcid":false,"given":"Xiang","family":"Feng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1899-1135","authenticated-orcid":false,"given":"Huiqun","family":"Yu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"02","key":"10.1016\/j.eswa.2026.131850_bib0001","first-page":"18","article-title":"Reinforcement learning-based approaches for improving safety and trust in robot-to-robot and human-robot interaction","volume":"16","author":"Abouelyazid","year":"2024","journal-title":"Advances in Urban Resilience and Sustainable City Design"},{"key":"10.1016\/j.eswa.2026.131850_bib0002","series-title":"International conference on machine learning","first-page":"22","article-title":"Constrained policy optimization","author":"Achiam","year":"2017"},{"key":"10.1016\/j.eswa.2026.131850_bib0003","series-title":"53rd IEEE conference on decision and control","first-page":"1424","article-title":"Reachability-based safe learning with gaussian processes","author":"Akametalu","year":"2014"},{"key":"10.1016\/j.eswa.2026.131850_bib0004","article-title":"Safe reinforcement learning with contextual information: Theory and application to personalized comorbidity management","author":"Cao","year":"2023","journal-title":"Available at SSRN 4583667"},{"issue":"5","key":"10.1016\/j.eswa.2026.131850_bib0005","doi-asserted-by":"crossref","first-page":"2979","DOI":"10.1109\/TAC.2023.3240925","article-title":"Learning to act safely with limited exposure and almost sure certainty","volume":"68","author":"Castellano","year":"2023","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"5","key":"10.1016\/j.eswa.2026.131850_bib0006","doi-asserted-by":"crossref","first-page":"2979","DOI":"10.1109\/TAC.2023.3240925","article-title":"Learning to act safely with limited exposure and almost sure certainty","volume":"68","author":"Castellano","year":"2023","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.eswa.2026.131850_bib0007","series-title":"2019 International conference on robotics and automation (ICRA)","first-page":"8973","article-title":"Closing the sim-to-real loop: Adapting simulation randomization with real world experience","author":"Chebotar","year":"2019"},{"key":"10.1016\/j.eswa.2026.131850_bib0008","unstructured":"Chow, Y., Nachum, O., Faust, A., Duenez-Guzman, E., & Ghavamzadeh, M. (2019). Lyapunov-based safe policy optimization for continuous control. arXiv preprint arXiv: 1901.10031."},{"key":"10.1016\/j.eswa.2026.131850_bib0009","series-title":"Conference on robot learning","first-page":"1645","article-title":"Tactile sim-to-real policy transfer via real-to-sim image translation","author":"Church","year":"2022"},{"key":"10.1016\/j.eswa.2026.131850_bib0010","unstructured":"Dalal, G., Dvijotham, K., Vecerik, M., Hester, T., Paduraru, C., & Tassa, Y. (2018). Safe exploration in continuous action spaces. arXiv preprint arXiv: 1801.08757."},{"issue":"1","key":"10.1016\/j.eswa.2026.131850_bib0011","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u0131a","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.eswa.2026.131850_bib0012","series-title":"Convex polytopes","volume":"vol. 16","author":"Gr\u00fcnbaum","year":"1967"},{"key":"10.1016\/j.eswa.2026.131850_bib0013","series-title":"2017\u202fIEEE international conference on robotics and automation (ICRA)","first-page":"3389","article-title":"Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates","author":"Gu","year":"2017"},{"key":"10.1016\/j.eswa.2026.131850_bib0014","unstructured":"Hafner, D., Lillicrap, T., Ba, J., & Norouzi, M. (2019a). Dream to control: Learning behaviors by latent imagination. arXiv preprint arXiv: 1912.01603."},{"key":"10.1016\/j.eswa.2026.131850_bib0015","series-title":"International conference on machine learning","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"Hafner","year":"2019"},{"key":"10.1016\/j.eswa.2026.131850_bib0016","series-title":"Linear systems theory","author":"Hespanha","year":"2018"},{"issue":"11","key":"10.1016\/j.eswa.2026.131850_bib0017","doi-asserted-by":"crossref","first-page":"18880","DOI":"10.1109\/TITS.2025.3599260","article-title":"Risk-aware reinforcement learning for non-conservative motion planning in uncertain autonomous driving environments","volume":"26","author":"Hu","year":"2025","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"issue":"4","key":"10.1016\/j.eswa.2026.131850_bib0018","doi-asserted-by":"crossref","first-page":"6748","DOI":"10.1109\/LRA.2020.3011912","article-title":"Deep reinforcement learning for safe local planning of a ground vehicle in unknown rough terrain","volume":"5","author":"Josef","year":"2020","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.eswa.2026.131850_bib0019","series-title":"2020\u202fIEEE 23rd international conference on intelligent transportation systems (ITSC)","first-page":"1","article-title":"Safe reinforcement learning for autonomous lane changing using set-based prediction","author":"Krasowski","year":"2020"},{"key":"10.1016\/j.eswa.2026.131850_bib0020","series-title":"International conference on pattern recognition and artificial intelligence","first-page":"431","article-title":"Adaptive threshold for anomaly detection in atm radar data streams","author":"Krim Rahaoui","year":"2022"},{"key":"10.1016\/j.eswa.2026.131850_bib0021","series-title":"2023\u202fIEEE international conference on bioinformatics and biomedicine (BIBM)","first-page":"3059","article-title":"Adaptive thresholding based on multi-task learning for refining binary medical image segmentation","author":"Lei","year":"2023"},{"key":"10.1016\/j.eswa.2026.131850_bib0022","unstructured":"Li, Q., Peng, Z., & Zhou, B. (2022). Efficient learning of safe driving policy via human-ai copilot optimization. arXiv preprint arXiv: 2202.10341."},{"key":"10.1016\/j.eswa.2026.131850_bib0023","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2015). Continuous control with deep reinforcement learning. arXiv preprint arXiv: 1509.02971."},{"key":"10.1016\/j.eswa.2026.131850_bib0024","doi-asserted-by":"crossref","first-page":"105756","DOI":"10.1109\/ACCESS.2021.3100007","article-title":"A blood glucose control framework based on reinforcement learning with safety and interpretability: In silico validation","volume":"9","author":"Lim","year":"2021","journal-title":"IEEE Access"},{"issue":"7","key":"10.1016\/j.eswa.2026.131850_bib0025","doi-asserted-by":"crossref","first-page":"3139","DOI":"10.1109\/TKDE.2024.3360640","article-title":"Hierarchical context representation and self-adaptive thresholding for multivariate anomaly detection","volume":"36","author":"Lin","year":"2024","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"10.1016\/j.eswa.2026.131850_bib0026","series-title":"2021\u202fIEEE international conference on real-time computing and robotics (RCAR)","first-page":"423","article-title":"Autonomous mobile robot navigation in uncertain dynamic environments based on deep reinforcement learning","author":"Lu","year":"2021"},{"key":"10.1016\/j.eswa.2026.131850_bib0027","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv: 1312.5602."},{"key":"10.1016\/j.eswa.2026.131850_bib0028","series-title":"Neural Information Processing Systems","first-page":"1054","article-title":"Safe and efficient off-policy reinforcement learning","author":"Munos","year":"2016"},{"issue":"1","key":"10.1016\/j.eswa.2026.131850_bib0029","first-page":"3","article-title":"Towards cost-effective and safe contact-rich robotic manipulation with reinforcement learning: A review of techniques for future industrial automation","volume":"240","author":"Parnada","year":"2026","journal-title":"Proceedings of the Institution of Mechanical Engineers, Part I: Journal of Systems and Control Engineering"},{"key":"10.1016\/j.eswa.2026.131850_bib0030","series-title":"Conference on robot learning","first-page":"1554","article-title":"Safe driving via expert guided policy optimization","author":"Peng","year":"2022"},{"issue":"5","key":"10.1016\/j.eswa.2026.131850_bib0031","doi-asserted-by":"crossref","first-page":"537","DOI":"10.1561\/2200000091","article-title":"Risk-sensitive reinforcement learning via policy gradient search","volume":"15","author":"Prashanth","year":"2022","journal-title":"Found. Trends\u00ae Mach. Learn."},{"key":"10.1016\/j.eswa.2026.131850_bib0032","unstructured":"Ray, A., Achiam, J., & Amodei, D. (2019). Benchmarking safe exploration in deep reinforcement learning. arXiv preprint arXiv: 1910.01708, 7(1), 2."},{"key":"10.1016\/j.eswa.2026.131850_bib0033","unstructured":"Saunders, W., Sastry, G., Stuhlmueller, A., & Evans, O. (2017). Trial without error: Towards safe reinforcement learning via human intervention. arXiv preprint arXiv: 1707.05173."},{"key":"10.1016\/j.eswa.2026.131850_bib0034","series-title":"International conference on machine learning","first-page":"1889","article-title":"Trust region policy optimization","author":"Schulman","year":"2015"},{"key":"10.1016\/j.eswa.2026.131850_bib0035","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv preprint arXiv: 1707.06347."},{"issue":"2","key":"10.1016\/j.eswa.2026.131850_bib0036","doi-asserted-by":"crossref","first-page":"3663","DOI":"10.1109\/LRA.2021.3063989","article-title":"Reachability-based trajectory safeguard (rts): A safe and fast reinforcement learning safety layer for continuous control","volume":"6","author":"Shao","year":"2021","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"7587","key":"10.1016\/j.eswa.2026.131850_bib0037","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"key":"10.1016\/j.eswa.2026.131850_bib0038","series-title":"International conference on machine learning","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"Silver","year":"2014"},{"issue":"7676","key":"10.1016\/j.eswa.2026.131850_bib0039","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"Silver","year":"2017","journal-title":"Nature"},{"key":"10.1016\/j.eswa.2026.131850_bib0040","series-title":"2022 International conference on robotics and automation (ICRA)","first-page":"6344","article-title":"Provably safe deep reinforcement learning for robotic manipulation in human environments","author":"Thumm","year":"2022"},{"key":"10.1016\/j.eswa.2026.131850_bib0041","series-title":"2022 International conference on robotics and automation (ICRA)","first-page":"6344","article-title":"Provably safe deep reinforcement learning for robotic manipulation in human environments","author":"Thumm","year":"2022"},{"issue":"7782","key":"10.1016\/j.eswa.2026.131850_bib0042","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in starcraft II using multi-agent reinforcement learning","volume":"575","author":"Vinyals","year":"2019","journal-title":"Nature"},{"key":"10.1016\/j.eswa.2026.131850_bib0043","doi-asserted-by":"crossref","DOI":"10.3389\/fnbot.2023.1271607","article-title":"Learning adaptive reaching and pushing skills using contact information","volume":"17","author":"Wang","year":"2023","journal-title":"Frontiers in Neurorobotics"},{"key":"10.1016\/j.eswa.2026.131850_bib0044","series-title":"2024\u202fIEEE\/RSJ International conference on intelligent robots and systems (IROS)","first-page":"2913","article-title":"Rttf: Rapid tactile transfer framework for contact-rich manipulation tasks","author":"Wu","year":"2024"},{"key":"10.1016\/j.eswa.2026.131850_bib0045","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.1016\/j.ins.2022.07.080","article-title":"An intelligent intervention strategy for patients to prevent chronic complications based on reinforcement learning","volume":"612","author":"You","year":"2022","journal-title":"Information Sciences"},{"issue":"13","key":"10.1016\/j.eswa.2026.131850_bib0046","doi-asserted-by":"crossref","first-page":"23154","DOI":"10.1109\/JIOT.2025.3554521","article-title":"Adaptive safety-certified reinforcement learning for constrained optimal control of autonomous robots with uncertainties","volume":"12","author":"Zhang","year":"2025","journal-title":"IEEE Internet of Things Journal"},{"key":"10.1016\/j.eswa.2026.131850_bib0047","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2022.104321","article-title":"Skill generalization of tubular object manipulation with tactile sensing and sim2real learning","volume":"160","author":"Zhao","year":"2023","journal-title":"Robotics and Autonomous Systems"},{"key":"10.1016\/j.eswa.2026.131850_bib0048","series-title":"Learning for dynamics and control","first-page":"336","article-title":"Safe reinforcement learning of control-affine systems with vertex networks","author":"Zheng","year":"2021"},{"key":"10.1016\/j.eswa.2026.131850_bib0049","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"14612","article-title":"Document-level relation extraction with adaptive thresholding and localized context pooling","volume":"vol. 35","author":"Zhou","year":"2021"},{"issue":"12","key":"10.1016\/j.eswa.2026.131850_bib0050","doi-asserted-by":"crossref","first-page":"25201","DOI":"10.1109\/TITS.2022.3213604","article-title":"Navigating robots in dynamic environment with deep reinforcement learning","volume":"23","author":"Zhou","year":"2022","journal-title":"IEEE Transactions on Intelligent Transportation Systems"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426007633?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426007633?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T00:14:37Z","timestamp":1776125677000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426007633"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":50,"alternative-id":["S0957417426007633"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131850","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Efficient and safe decision-making in reinforcement learning: One-step anticipatory policy selector with adaptive safety thresholds","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131850","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"131850"}}