{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T11:39:59Z","timestamp":1769773199951,"version":"3.49.0"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032159861","type":"print"},{"value":"9783032159878","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-15987-8_22","type":"book-chapter","created":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T20:28:02Z","timestamp":1769718482000},"page":"331-346","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dead-End Discovery and\u00a0Secure Exploration via\u00a0Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6123-6496","authenticated-orcid":false,"given":"Christian Delgado","family":"Polar","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9128-0188","authenticated-orcid":false,"given":"Leliane Nunes de","family":"Barros","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0330-3931","authenticated-orcid":false,"given":"Valdinei","family":"Freire","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9120-8987","authenticated-orcid":false,"given":"Karina Valdivia","family":"Delgado","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,30]]},"reference":[{"key":"22_CR1","unstructured":"Cao, Y., et al.: Survey on large language model-enhanced reinforcement learning: concept, taxonomy, and methods. IEEE Trans. Neural Networks Learn. Syst., 1\u201321 (2024)"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Crispino, G.N., Freire, V., Delgado, K.V.: GUBS criterion: arbitrary trade-offs between cost and probability-to-goal in stochastic planning based on expected utility theory. Artif. Intell. 316(C) (2023)","DOI":"10.1016\/j.artint.2022.103848"},{"key":"22_CR3","unstructured":"Du, Y., et al.: Guiding pretraining in reinforcement learning with large language models. In: International Conference on Machine Learning, vol.\u00a0202, pp. 8657\u20138677 (2023)"},{"key":"22_CR4","unstructured":"Fatemi, M., Killian, T.W., Subramanian, J., Ghassemi, M.: Medical dead-ends and learning to identify high-risk states and treatments. In: NIPS (2021)"},{"key":"22_CR5","unstructured":"Fatemi, M., Sharma, S., Van\u00a0Seijen, H., Kahou, S.E.: Dead-ends and secure exploration in reinforcement learning. In: International Conference on Machine Learning, vol.\u00a097, pp. 1873\u20131881 (2019)"},{"key":"22_CR6","doi-asserted-by":"crossref","unstructured":"Freire, V., Delgado, K.V., Reis, W.A.S.: An exact algorithm to make a trade-off between cost and probability in SSPs. In: ICAPS 2019, vol.\u00a029, pp. 146\u2013154 (2019)","DOI":"10.1609\/icaps.v29i1.3470"},{"key":"22_CR7","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(1), 1437\u20131480 (2015)"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Geibel, P., Wysotzki, F.: Risk-sensitive reinforcement learning applied to control under constraints. J. Artif. Intell. Res. 24, 81\u2013108 (2005)","DOI":"10.1613\/jair.1666"},{"key":"22_CR9","unstructured":"Gemini Team et al.: Gemini: a family of highly capable multimodal models. arXiv e-prints arXiv:2312.11805 (2023)"},{"key":"22_CR10","unstructured":"Hendrycks, D., et al.: Moral scenarios for reinforcement learning agents (2021)"},{"key":"22_CR11","unstructured":"Killian, T.W., Parbhoo, S., Ghassemi, M.: Risk sensitive dead-end identification in safety-critical offline reinforcement learning. Trans. Mach. Learn. Res. (2023)"},{"key":"22_CR12","unstructured":"Kolobov, A., Mausam, Weld, D.S.: A theory of goal-oriented MDPs with dead ends. In: Conference on Uncertainty in Artificial Intelligence, pp. 438\u2013447 (2012)"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Kolobov, A., Mausam, M., Weld, D., Geffner, H.: Heuristic search for generalized stochastic shortest path MDPs. In: ICAPS 2011, vol.\u00a021, pp. 130\u2013137 (2011)","DOI":"10.1609\/icaps.v21i1.13452"},{"key":"22_CR14","unstructured":"Kwon, M., Xie, S.M., Bullard, K., Sadigh, D.: Reward design with language models. ArXiv abs\/2303.00001 (2023)"},{"key":"22_CR15","unstructured":"Micheli, V., Alonso, E., Fleuret, F.: Transformers are sample efficient world models. In: The Eleventh International Conference on Learning Representations (2022)"},{"issue":"9","key":"22_CR16","doi-asserted-by":"publisher","first-page":"1379","DOI":"10.1016\/S0005-1098(01)00084-X","volume":"37","author":"SD Patek","year":"2001","unstructured":"Patek, S.D.: On terminating Markov decision processes with a risk-averse objective function. Automatica 37(9), 1379\u20131386 (2001)","journal-title":"Automatica"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Polar, C.D., Delgado, K.V., Freire, V.: Reinforcement learning with utility-based semantic for goals. In: Paes, A., Verri, F.A.N. (eds.) Intelligent Systems, pp. 354\u2013369 (2025)","DOI":"10.1007\/978-3-031-79032-4_25"},{"key":"22_CR18","doi-asserted-by":"crossref","unstructured":"Puterman, M.L.: Markov Decision Processes. Wiley (1994)","DOI":"10.1002\/9780470316887"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Teichteil-Konigsbuch, F., Vidal, V., Infantes, G.: Extending classical planning heuristics to probabilistic planning with dead-ends. In: AAAI Conference on Artificial Intelligence (2011)","DOI":"10.1609\/aaai.v25i1.8016"},{"key":"22_CR20","unstructured":"Thomas, G., Luo, Y., Ma, T.: Safe reinforcement learning by imagining the near future. In: International Conference on Neural Information Processing Systems (2021)"},{"key":"22_CR21","unstructured":"Wang, Z., et al.: $$\\rm E^{2}CFD$$: towards effective and efficient cost function design for safe reinforcement learning via large language model. arXiv (2024)"},{"key":"22_CR22","unstructured":"Yu, W., et al.: Language to rewards for robotic skill synthesis. In: Conference on Robot Learning, vol.\u00a0229, pp. 374\u2013404 (2023)"},{"issue":"1","key":"22_CR23","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1109\/LRA.2023.3333248","volume":"9","author":"X Zhang","year":"2024","unstructured":"Zhang, X., et al.: Safe reinforcement learning with dead-ends avoidance and recovery. IEEE Rob. Autom. Lett. 9(1), 491\u2013498 (2024)","journal-title":"IEEE Rob. Autom. Lett."}],"container-title":["Lecture Notes in Computer Science","Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-15987-8_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T20:28:07Z","timestamp":1769718487000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-15987-8_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032159861","9783032159878"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-15987-8_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"30 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"BRACIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazilian Conference on Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Fortaleza-CE","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bracis2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/bracis.sbc.org.br\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}