{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:46:20Z","timestamp":1742913980297,"version":"3.40.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319592251"},{"type":"electronic","value":"9783319592268"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-59226-8_11","type":"book-chapter","created":{"date-parts":[[2017,5,19]],"date-time":"2017-05-19T14:34:22Z","timestamp":1495204462000},"page":"105-116","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Exploration Strategy for RL with Considerations of Budget and Risk"],"prefix":"10.1007","author":[{"given":"Jonathan","family":"Serrano Cuevas","sequence":"first","affiliation":[]},{"given":"Eduardo","family":"Morales Manzanares","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,5,20]]},"reference":[{"key":"11_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"11_CR2","volume-title":"Efficient Exploration in Reinforcement Learning","author":"SB Thrun","year":"1992","unstructured":"Thrun, S.B.: Efficient Exploration in Reinforcement Learning. Springer, New York (1992)"},{"issue":"2","key":"11_CR3","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/0004-3702(92)90058-6","volume":"55","author":"S Mahadevan","year":"1992","unstructured":"Mahadevan, S., Connell, J.: Automatic programming of behavior-based robots using reinforcement learning. Artif. Intell. 55(2), 311\u2013365 (1992)","journal-title":"Artif. Intell."},{"key":"11_CR4","doi-asserted-by":"crossref","unstructured":"Nevmyvaka, Y., Feng, Y., Kearns, M.: Reinforcement learning for optimized trade execution. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 673\u2013680. ACM (2006)","DOI":"10.1145\/1143844.1143929"},{"key":"11_CR5","unstructured":"Thomas, P.S.: Safe reinforcement learning (2015)"},{"key":"11_CR6","first-page":"1437","volume":"16","author":"J Garc\u00eda","year":"2015","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16, 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"issue":"2\u20133","key":"11_CR7","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1023\/A:1017940631555","volume":"49","author":"O Mihatsch","year":"2002","unstructured":"Mihatsch, O., Neuneier, R.: Risk-sensitive reinforcement learning. Mach. Learn. 49(2\u20133), 267\u2013290 (2002)","journal-title":"Mach. Learn."},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Heger, M.: Consideration of risk in reinforcement learning. In: Proceedings of the Eleventh International Conference on Machine Learning, pp. 105\u2013111 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50021-0"},{"issue":"2","key":"11_CR9","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/S0005-1098(98)00153-8","volume":"35","author":"SP Coraluppi","year":"1999","unstructured":"Coraluppi, S.P., Marcus, S.I.: Risk-sensitive and minimax control of discrete-time, finite-state Markov decision processes. Automatica 35(2), 301\u2013309 (1999)","journal-title":"Automatica"},{"issue":"3","key":"11_CR10","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1023\/B:MACH.0000039779.47329.3a","volume":"57","author":"K Driessens","year":"2004","unstructured":"Driessens, K., D\u017eeroski, S.: Integrating guidance into relational reinforcement learning. Mach. Learn. 57(3), 271\u2013304 (2004)","journal-title":"Mach. Learn."},{"key":"11_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/978-3-642-04772-5_11","volume-title":"Computer Aided Systems Theory - EUROCAST 2009","author":"JA Mart\u00edn H.","year":"2009","unstructured":"Mart\u00edn H., J.A., Lope, J.: Learning autonomous helicopter flight with evolutionary reinforcement learning. In: Moreno-D\u00edaz, R., Pichler, F., Quesada-Arencibia, A. (eds.) EUROCAST 2009. LNCS, vol. 5717, pp. 75\u201382. Springer, Heidelberg (2009). doi:10.1007\/978-3-642-04772-5_11"},{"key":"11_CR12","unstructured":"Abbeel, P.: Apprenticeship learning and reinforcement learning with application to robotic control. In: ProQuest (2008)"},{"key":"11_CR13","doi-asserted-by":"crossref","first-page":"515","DOI":"10.1613\/jair.3761","volume":"45","author":"J Garcia","year":"2012","unstructured":"Garcia, J., Fern\u00e1ndez, F.: Safe exploration of state and action spaces in reinforcement learning. J. Artif. Intell. Res. 45, 515\u2013564 (2012)","journal-title":"J. Artif. Intell. Res."},{"key":"11_CR14","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: ICML, vol. 99, pp. 278\u2013287 (1999)"},{"issue":"2","key":"11_CR15","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1016\/0004-3702(94)90047-7","volume":"71","author":"M Dorigo","year":"1994","unstructured":"Dorigo, M., Colombetti, M.: Robot shaping: developing autonomous agents through learning. Artif. Intell. 71(2), 321\u2013370 (1994)","journal-title":"Artif. Intell."},{"key":"11_CR16","unstructured":"Devlin, S., Kudenko, D.: Dynamic potential-based reward shaping. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems, vol. 1, pp. 433\u2013440 (2012)"},{"key":"11_CR17","first-page":"2012","volume":"18","author":"PE Black","year":"2006","unstructured":"Black, P.E.: Manhattan distance. Dict. Algorithms Data Struct. 18, 2012 (2006)","journal-title":"Dict. Algorithms Data Struct."},{"key":"11_CR18","unstructured":"MacGlashan, J.: Brown UMBC reinforcement learning and planning BURLAP. http:\/\/burlap.cs.brown.edu\/. Accessed 5 Jan 2017"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-59226-8_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T17:58:40Z","timestamp":1710266320000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-59226-8_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319592251","9783319592268"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-59226-8_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"20 May 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Mexican Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Huatulco","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Mexico","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 June 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 June 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mcpr22017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ccc.inaoep.mx\/~mcpr2017\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}