{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T20:30:54Z","timestamp":1757622654019,"version":"3.44.0"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030865191"},{"type":"electronic","value":"9783030865207"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86520-7_35","type":"book-chapter","created":{"date-parts":[[2021,9,9]],"date-time":"2021-09-09T15:25:48Z","timestamp":1631201148000},"page":"567-582","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Reconnaissance for Reinforcement Learning with Safety Constraints"],"prefix":"10.1007","author":[{"given":"Shin-ichi","family":"Maeda","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hayato","family":"Watahiki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Ouyang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shintarou","family":"Okada","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masanori","family":"Koyama","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Prabhat","family":"Nagarajan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,10]]},"reference":[{"key":"35_CR1","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: ICML, pp. 22\u201331 (2017)"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Akametalu, A.K., Fisac, J.F., Gillula, J.H., Kaynama, S., Zeilinger, M.N., Tomlin, C.J.: Reachability-based safe learning with Gaussian processes. In: CDC, pp. 1424\u20131431 (2014)","DOI":"10.1109\/CDC.2014.7039601"},{"key":"35_CR3","volume-title":"Constrained Markov Decision Processes","author":"E Altman","year":"1999","unstructured":"Altman, E.: Constrained Markov Decision Processes, vol. 7. CRC Press, Boca Raton (1999)"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Ames, A.D., Coogan, S., Egerstedt, M., Notomista, G., Sreenath, K., Tabuada, P.: Control barrier functions: theory and applications. In: ECC, pp. 3420\u20133431 (2019)","DOI":"10.23919\/ECC.2019.8796030"},{"key":"35_CR5","doi-asserted-by":"crossref","unstructured":"Bansal, S., Chen, M., Herbert, S.L., Tomlin, C.J.: Hamilton-Jacobi reachability: a brief overview and recent advances. In: CDC, pp. 2242\u20132253 (2017)","DOI":"10.1109\/CDC.2017.8263977"},{"key":"35_CR6","volume-title":"Model Predictive Control: Theory and Design","author":"RJ Blake","year":"2009","unstructured":"Blake, R.J., Mayne David, Q.: Model Predictive Control: Theory and Design. Nob Hill Pub., Madison (2009)"},{"key":"35_CR7","unstructured":"Brockman, G., et al.: OpenAI gym (2016)"},{"issue":"1","key":"35_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1504\/IJAAC.2011.037377","volume":"5","author":"O Cetin","year":"2011","unstructured":"Cetin, O., Kurnaz, S., Kaynak, O., Temeltas, H.: Potential field-based navigation task for autonomous flight control of unmanned aerial vehicles. Int. J. Autom. Control 5(1), 1\u201321 (2011)","journal-title":"Int. J. Autom. Control"},{"key":"35_CR9","doi-asserted-by":"crossref","unstructured":"Chang, P., Mertz, C.: Monte Carlo sampling based imminent collision detection algorithm. In: ICTIS, pp. 368\u2013376 (2017)","DOI":"10.1109\/ICTIS.2017.8047791"},{"key":"35_CR10","doi-asserted-by":"crossref","unstructured":"Chen, M., Herbert, S., Tomlin, C.J.: Fast reachable set approximations via state decoupling disturbances. In: CDC, pp. 191\u2013196 (2016)","DOI":"10.1109\/CDC.2016.7798268"},{"key":"35_CR11","doi-asserted-by":"crossref","unstructured":"Chen, M., Herbert, S., Tomlin, C.J.: Exact and efficient Hamilton-Jacobi-based guaranteed safety analysis via system decomposition. In: ICRA (2017)","DOI":"10.1109\/ICRA.2017.7989015"},{"key":"35_CR12","unstructured":"Chow, Y., Ghavamzadeh, M., Janson, L., Pavone, M.: Risk-constrained reinforcement learning with percentile risk criteria. JMLR (2018)"},{"key":"35_CR13","unstructured":"Chow, Y., Nachum, O., Duenez-Guzman, E., Ghavamzadeh, M.: A Lyapunov-based approach to safe reinforcement learning. In: NeurIPS (2018)"},{"key":"35_CR14","unstructured":"Chow, Y., Nachum, O., Faust, A., Ghavamzadeh, M., Duenez-Guzman, E.: Lyapunov-based safe policy optimization for continuous control. In: ICML (2019)"},{"issue":"3","key":"35_CR15","doi-asserted-by":"publisher","first-page":"1018","DOI":"10.1109\/TCST.2013.2272179","volume":"22","author":"S Di Cairano","year":"2013","unstructured":"Di Cairano, S., Bernardini, D., Bemporad, A., Kolmanovsky, I.V.: Stochastic MPC with learning for driver-predictive vehicle control and its application to HEV energy management. IEEE Trans. Control Syst. Technol. 22(3), 1018\u20131031 (2013)","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"35_CR16","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., Koltun, V.: CARLA: an open urban driving simulator. In: CoRL, pp. 1\u201316 (2017)"},{"issue":"1","key":"35_CR17","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1109\/TITS.2007.909241","volume":"9","author":"A Eidehall","year":"2008","unstructured":"Eidehall, A., Petersson, L.: Statistical threat assessment for general road scenes using Monte Carlo sampling. IEEE Trans. Intell. Transp. Syst. 9(1), 137\u2013147 (2008)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"7","key":"35_CR18","doi-asserted-by":"publisher","first-page":"2737","DOI":"10.1109\/TAC.2018.2876389","volume":"64","author":"JF Fisac","year":"2019","unstructured":"Fisac, J.F., Akametalu, A.K., Zeilinger, M.N., Kaynama, S., Gillula, J., Tomlin, C.J.: A general safety framework for learning-based control in uncertain robotic systems. IEEE Trans. Autom. Control 64(7), 2737\u20132752 (2019)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"5","key":"35_CR19","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1109\/70.880813","volume":"16","author":"SS Ge","year":"2000","unstructured":"Ge, S.S., Cui, Y.J.: New potential functions for mobile robot path planning. IEEE Trans. Robot. Autom. 16(5), 615\u2013620 (2000)","journal-title":"IEEE Trans. Robot. Autom."},{"key":"35_CR20","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"646","DOI":"10.1007\/11871842_63","volume-title":"Machine Learning: ECML 2006","author":"P Geibel","year":"2006","unstructured":"Geibel, P.: Reinforcement learning for MDPs with constraints. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol. 4212, pp. 646\u2013653. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11871842_63"},{"issue":"2","key":"35_CR21","doi-asserted-by":"publisher","first-page":"952","DOI":"10.1109\/TVT.2016.2555853","volume":"66","author":"J Ji","year":"2016","unstructured":"Ji, J., Khajepour, A., Melek, W.W., Huang, Y.: Path planning and tracking for vehicle collision avoidance based on model predictive control with multiconstraints. IEEE Trans. Veh. Technol. 66(2), 952\u2013964 (2016)","journal-title":"IEEE Trans. Veh. Technol."},{"key":"35_CR22","doi-asserted-by":"crossref","unstructured":"Koller, T., Berkenkamp, F., Turchetta, M., Krause, A.: Learning-based model predictive control for safe exploration. In: CDC, pp. 6059\u20136066 (2018)","DOI":"10.1109\/CDC.2018.8619572"},{"issue":"1","key":"35_CR23","first-page":"99","volume":"27","author":"CP Lam","year":"2010","unstructured":"Lam, C.P., Chou, C.T., Chiang, K.H., Fu, L.C.: Human-centered robot navigation-towards a harmoniously human-robot coexisting environment. T-RO 27(1), 99\u2013112 (2010)","journal-title":"T-RO"},{"issue":"4","key":"35_CR24","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1068\/p050437","volume":"5","author":"DN Lee","year":"1976","unstructured":"Lee, D.N.: A theory of visual control of braking based on information about time-to-collision. Perception 5(4), 437\u2013459 (1976)","journal-title":"Perception"},{"key":"35_CR25","volume-title":"Predictive Control: With Constraints","author":"JM Maciejowski","year":"2002","unstructured":"Maciejowski, J.M.: Predictive Control: With Constraints. Pearson Education, London (2002)"},{"issue":"7540","key":"35_CR26","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"35_CR27","unstructured":"Moldovan, T.M., Abbeel, P.: Safe exploration in Markov decision processes. In: ICML (2012)"},{"issue":"8","key":"35_CR28","doi-asserted-by":"publisher","first-page":"1415","DOI":"10.1109\/TAC.2007.902736","volume":"52","author":"S Prajna","year":"2007","unstructured":"Prajna, S., Jadbabaie, A., Pappas, G.J.: A framework for worst-case and stochastic safety verification using barrier certificates. IEEE Trans. Autom. Control 52(8), 1415\u20131428 (2007)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"5","key":"35_CR29","doi-asserted-by":"publisher","first-page":"1255","DOI":"10.1109\/TITS.2016.2604240","volume":"18","author":"Y Rasekhipour","year":"2016","unstructured":"Rasekhipour, Y., Khajepour, A., Chen, S.K., Litkouhi, B.: A potential field-based model predictive path-planning controller for autonomous road vehicles. IEEE Trans. Intell. Transp. Syst. 18(5), 1255\u20131267 (2016)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"35_CR30","doi-asserted-by":"crossref","unstructured":"Maeda, S., Watahiki, H., Ouyang, Y., Okada, S., Koyama, M., Nagarajan, P.: Supplementary of reconnaissance for reinforcement learning with safety constraints (2021). https:\/\/github.com\/pfnet-research\/rp-safe-rl","DOI":"10.1007\/978-3-030-86520-7_35"},{"key":"35_CR31","doi-asserted-by":"crossref","unstructured":"Summers, S., Kamgarpour, M., Lygeros, J., Tomlin, C.: A stochastic reach-avoid problem with random obstacles. In: 14th International Conference on Hybrid Systems: Computation and Control, pp. 251\u2013260 (2011)","DOI":"10.1145\/1967701.1967738"},{"key":"35_CR32","doi-asserted-by":"crossref","unstructured":"Wabersich, K.P., Zeilinger, M.N.: Linear model predictive safety certification for learning-based control. In: CDC, pp. 7130\u20137135 (2018)","DOI":"10.1109\/CDC.2018.8619829"},{"issue":"2","key":"35_CR33","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1109\/TCST.2009.2017934","volume":"18","author":"Y Wang","year":"2010","unstructured":"Wang, Y., Boyd, S.: Fast model predictive control using online optimization. IEEE Trans. Control Syst. Technol. 18(2), 267\u2013278 (2010)","journal-title":"IEEE Trans. Control Syst. Technol."},{"issue":"6","key":"35_CR34","doi-asserted-by":"publisher","first-page":"2034","DOI":"10.1109\/TCST.2016.2642164","volume":"25","author":"T Weiskircher","year":"2017","unstructured":"Weiskircher, T., Wang, Q., Ayalew, B.: Predictive guidance and control framework for (semi-) autonomous vehicles in public traffic. IEEE Trans. Control Syst. Technol. 25(6), 2034\u20132046 (2017)","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"35_CR35","doi-asserted-by":"crossref","unstructured":"Wolf, M.T., Burdick, J.W.: Artificial potential functions for highway driving with collision avoidance. In: ICRA, pp. 3731\u20133736 (2008)","DOI":"10.1109\/ROBOT.2008.4543783"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86520-7_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T22:05:00Z","timestamp":1757369100000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86520-7_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030865191","9783030865207"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86520-7_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"10 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2021.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"869","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"210","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}