{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T23:14:47Z","timestamp":1743030887514,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031553257"},{"type":"electronic","value":"9783031553264"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-55326-4_8","type":"book-chapter","created":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T08:46:10Z","timestamp":1710405970000},"page":"164-186","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Data-Efficient Offline Reinforcement Learning with\u00a0Approximate Symmetries"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1878-5833","authenticated-orcid":false,"given":"Giorgio","family":"Angelotti","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0002-9973","authenticated-orcid":false,"given":"Nicolas","family":"Drougard","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3578-4186","authenticated-orcid":false,"given":"Caroline P. C.","family":"Chanel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,15]]},"reference":[{"key":"8_CR1","unstructured":"Abel, D., Umbanhowar, N., Khetarpal, K., Arumugam, D., Precup, D., Littman, M.: Value preserving state-action abstractions. In: International Conference on Artificial Intelligence and Statistics, pp. 1639\u20131650. PMLR (2020)"},{"key":"8_CR2","unstructured":"Angelotti, G., Drougard, N., Chanel, C.P.C.: Offline learning for planning: a summary. In: Proceedings of the 1st Workshop on Bridging the Gap Between AI Planning and Reinforcement Learning at the 30th International Conference on Automated Planning and Scheduling, pp. 153\u2013161 (2020)"},{"key":"8_CR3","doi-asserted-by":"publisher","unstructured":"Angelotti, G., Drougard, N., Chanel, C.P.C.: Expert-guided symmetry detection in markov decision processes. In: Proceedings of the 14th International Conference on Agents and Artificial Intelligence, vol. 2: ICAART, pp. 88\u201398. INSTICC, SciTePress (2022). https:\/\/doi.org\/10.5220\/0010783400003116","DOI":"10.5220\/0010783400003116"},{"key":"8_CR4","doi-asserted-by":"publisher","unstructured":"Angelotti, G., Drougard, N., Chanel, C.P.C.: Data augmentation through expert-guided symmetry detection to improve performance in offline reinforcement learning. In: Proceedings of the 15th International Conference on Agents and Artificial Intelligence, vol. 2: ICAART, pp. 115\u2013124. INSTICC, SciTePress (2023). https:\/\/doi.org\/10.5220\/0011633400003393","DOI":"10.5220\/0011633400003393"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-dynamic programming: an overview. In: Proceedings of 1995 34th IEEE Conference on Decision and Control, vol. 1, pp. 560\u2013564. IEEE (1995)","DOI":"10.1109\/CDC.1995.478953"},{"key":"8_CR6","unstructured":"Brockman, G., et al.: Openai gym (2016). arXiv:1606.01540"},{"key":"8_CR7","doi-asserted-by":"publisher","unstructured":"Castro, P.S.: Scalable methods for computing state similarity in deterministic markov decision processes. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 10069\u201310076 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i06.6564","DOI":"10.1609\/aaai.v34i06.6564"},{"key":"8_CR8","unstructured":"Dean, T., Givan, R.: Model minimization in markov decision processes. In: AAAI\/IAAI, pp. 106\u2013111 (1997)"},{"key":"8_CR9","unstructured":"Dinh, L., Krueger, D., Bengio, Y.: NICE: non-linear independent components estimation. In: Proceedings of the 3rd International Conference on Learning Representations (2015). http:\/\/arxiv.org\/abs\/1410.8516"},{"key":"8_CR10","unstructured":"Ferns, N., Panangaden, P., Precup, D.: Metrics for finite markov decision processes. In: Conference on Uncertainty in Artificial Intelligence, vol. 4, pp. 162\u2013169 (2004)"},{"issue":"1\u20132","key":"8_CR11","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/S0004-3702(02)00376-4","volume":"147","author":"R Givan","year":"2003","unstructured":"Givan, R., Dean, T., Greig, M.: Equivalence notions and model minimization in Markov decision processes. Artif. Intell. 147(1\u20132), 163\u2013223 (2003)","journal-title":"Artif. Intell."},{"key":"8_CR12","unstructured":"Grathwohl, W., Chen, R.T.Q., Bettencourt, J., Sutskever, I., Duvenaud, D.: FFJORD: free-form continuous dynamics for scalable reversible generative models. In: Proceedings of the 7th International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=rJxgknCcK7"},{"key":"8_CR13","doi-asserted-by":"publisher","first-page":"3964","DOI":"10.1109\/TPAMI.2020.2992934","volume":"43","author":"I Kobyzev","year":"2020","unstructured":"Kobyzev, I., Prince, S., Brubaker, M.: Normalizing flows: an introduction and review of current methods. IEEE Trans. Pattern Anal. Mach. Intell. 43, 3964\u20133979 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"8_CR14","first-page":"20132","volume":"34","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative q-learning for offline reinforcement learning. Adv. Neural Inf. Process. Syst. 34, 20132\u201320145 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"8_CR15","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: tutorial, review, and perspectives on open problems (2020). arXiv:2005.01643"},{"key":"8_CR16","unstructured":"Li, L., Walsh, T.J., Littman, M.: Towards a unified theory of state abstraction for MDPS. In: Proceedings of the Ninth International Symposium on Artificial Intelligence and Mathematics, pp. 531\u2013539 (2006)"},{"key":"8_CR17","unstructured":"Mandel, T., Liu, Y.E., Brunskill, E., Popovic, Z.: Efficient bayesian clustering for reinforcement learning. In: Proceedings of the 25th International Joint Conference on Artificial Intelligence, pp. 1830\u20131838 (2016)"},{"issue":"1","key":"8_CR18","first-page":"1","volume":"6","author":"N Mausam","year":"2012","unstructured":"Mausam, N., Kolobov, A.: Planning with Markov decision processes: an AI perspective. Synth. Lect. Artif. Intell. Mach. Learn. 6(1), 1\u2013210 (2012)","journal-title":"Synth. Lect. Artif. Intell. Mach. Learn."},{"issue":"7540","key":"8_CR19","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"8_CR20","unstructured":"Munos, R.: Error bounds for approximate policy iteration. In: Proceedings of the 20th International Conference on International Conference on Machine Learning, vol. 3, pp. 560\u2013567 (2003)"},{"issue":"2","key":"8_CR21","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1023\/A:1017992615625","volume":"49","author":"R Munos","year":"2002","unstructured":"Munos, R., Moore, A.: Variable resolution discretization in optimal control. Mach. Learn. 49(2), 291\u2013323 (2002)","journal-title":"Mach. Learn."},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"Narayanamurthy, S.M., Ravindran, B.: On the hardness of finding symmetries in Markov decision processes. In: Proceedings of the 25th International Conference on International Conference on Machine Learning, pp. 688\u2013695 (2008)","DOI":"10.1145\/1390156.1390243"},{"key":"8_CR23","unstructured":"Paine, T.L., et al.: Hyperparameter selection for offline reinforcement learning (2020). arXiv:2007.09055"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Prudencio, R.F., Maximo, M.R., Colombini, E.L.: A survey on offline reinforcement learning: taxonomy, review, and open problems. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3250269"},{"key":"8_CR25","unstructured":"Ravindran, B., Barto, A.G.: Symmetries and Model Minimization in Markov Decision Processes. Technical report, USA (2001)"},{"key":"8_CR26","unstructured":"Ravindran, B., Barto, A.G.: Approximate homomorphisms: a framework for non-exact minimization in Markov decision processes. In: International Conference on Knowledge Based Computer Systems (2004)"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Ruan, S.S., Comanici, G., Panangaden, P., Precup, D.: Representation discovery for mdps using bisimulation metrics. In: Twenty-Ninth AAAI Conference on Artificial Intelligence (2015)","DOI":"10.1609\/aaai.v29i1.9701"},{"key":"8_CR28","unstructured":"Takuma Seno, M.I.: d3rlpy: an offline deep reinforcement library. In: NeurIPS 2021 Offline Reinforcement Learning Workshop (2021)"},{"key":"8_CR29","unstructured":"Taylor, J., Precup, D., Panagaden, P.: Bounding performance loss in approximate MDP homomorphisms. Adv. Neural Inf. Process. Syst. 21 (2009). https:\/\/proceedings.neurips.cc\/paper\/2008\/file\/6602294be910b1e3c4571bd98c4d5484-Paper.pdf"},{"key":"8_CR30","unstructured":"van der Pol, E., Kipf, T., Oliehoek, F.A., Welling, M.: Plannable approximations to MDP homomorphisms: equivariance under actions. In: Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems, pp. 1431\u20131439 (2020)"},{"key":"8_CR31","unstructured":"van der Pol, E., Worrall, D., van Hoof, H., Oliehoek, F., Welling, M.: MDP homomorphic networks: group symmetries in reinforcement learning. Adv. Neural Inf. Process. Syst. 33, 4199\u20134210 (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/2be5f9c2e3620eb73c2972d7552b6cb5-Paper.pdf"},{"issue":"3","key":"8_CR32","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1287\/moor.3.3.231","volume":"3","author":"W Whitt","year":"1978","unstructured":"Whitt, W.: Approximations of dynamic programs, i. Math. Oper. Res. 3(3), 231\u2013243 (1978)","journal-title":"Math. Oper. Res."}],"container-title":["Lecture Notes in Computer Science","Agents and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-55326-4_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T08:26:30Z","timestamp":1731572790000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-55326-4_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031553257","9783031553264"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-55326-4_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"15 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAART","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Agents and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lisbon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 February 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 February 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaart2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icaart.scitevents.org\/?y=2023","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"PRIMORIS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"306","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"111","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}