{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T04:15:35Z","timestamp":1772338535100,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,24]]},"DOI":"10.1145\/3687272.3688298","type":"proceedings-article","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T00:24:28Z","timestamp":1732062268000},"page":"287-295","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["``Give Me an Example Like This'': Episodic Active Reinforcement Learning from Demonstrations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2195-5224","authenticated-orcid":false,"given":"Muhan","family":"Hou","sequence":"first","affiliation":[{"name":"Vrije Universiteit Amsterdam (VU Amsterdam), Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5707-5236","authenticated-orcid":false,"given":"Koen","family":"Hindriks","sequence":"additional","affiliation":[{"name":"Vrije Universiteit Amsterdam (VU Amsterdam), Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3106-4213","authenticated-orcid":false,"given":"Guszti","family":"Eiben","sequence":"additional","affiliation":[{"name":"Vrije Universiteit Amsterdam (VU Amsterdam), Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4381-4234","authenticated-orcid":false,"given":"Kim","family":"Baraka","sequence":"additional","affiliation":[{"name":"Vrije Universiteit Amsterdam (VU Amsterdam), Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proc. of the ICML workshop on new developments in imitation learning. Citeseer.","author":"Cakmak Maya","year":"2011","unstructured":"Maya Cakmak and Andrea\u00a0L Thomaz. 2011. Active learning with mixed query types in learning from demonstration. In Proc. of the ICML workshop on new developments in imitation learning. Citeseer."},{"key":"e_1_3_2_1_2_1","volume-title":"AI and HCI Workshop at the 40th International Conference on Machine Learning (ICML)","author":"Chen Ming-Hsin","year":"2023","unstructured":"Ming-Hsin Chen, Si-An Chen, and Hsuan-Tien Lin. 2023. Active Reinforcement Learning from Demonstration in Continuous Action Spaces. In AI and HCI Workshop at the 40th International Conference on Machine Learning (ICML), Honolulu, Hawaii, USA. 2023."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05849-4"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/1622716.1622717"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 2013 international conference on Autonomous agents and multi-agent systems. 1037\u20131044","author":"Gehring Clement","year":"2013","unstructured":"Clement Gehring and Doina Precup. 2013. Smart exploration in reinforcement learning using absolute temporal difference errors. In Proceedings of the 2013 international conference on Autonomous agents and multi-agent systems. 1037\u20131044."},{"key":"e_1_3_2_1_6_1","unstructured":"Adam Gleave Mohammad Taufeeque Juan Rocamonde Erik Jenner Steven\u00a0H. Wang Sam Toyer Maximilian Ernestus Nora Belrose Scott Emmons and Stuart Russell. 2022. imitation: Clean Imitation Learning Implementations. arXiv:2211.11972v1 [cs.LG]. arxiv:2211.11972\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2211.11972"},{"key":"e_1_3_2_1_7_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861\u20131870."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197408"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"e_1_3_2_1_10_1","volume-title":"Generative adversarial imitation learning. Advances in neural information processing systems 29","author":"Ho Jonathan","year":"2016","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative adversarial imitation learning. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN57019.2023.10309481"},{"key":"e_1_3_2_1_12_1","volume-title":"Conference on Robot Learning. PMLR, 1764\u20131768","author":"Johns Edward","year":"2022","unstructured":"Edward Johns. 2022. Back to reality for imitation learning. In Conference on Robot Learning. PMLR, 1764\u20131768."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/2627435.2750369"},{"key":"e_1_3_2_1_14_1","volume-title":"International conference on machine learning. PMLR, 2469\u20132478","author":"Kang Bingyi","year":"2018","unstructured":"Bingyi Kang, Zequn Jie, and Jiashi Feng. 2018. Policy optimization with demonstrations. In International conference on machine learning. PMLR, 2469\u20132478."},{"key":"e_1_3_2_1_15_1","volume-title":"Incremental sampling-based algorithms for optimal motion planning. Robotics Science and Systems VI 104, 2","author":"Karaman Sertac","year":"2010","unstructured":"Sertac Karaman and Emilio Frazzoli. 2010. Incremental sampling-based algorithms for optimal motion planning. Robotics Science and Systems VI 104, 2 (2010), 267\u2013274."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793698"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IV51971.2022.9827073"},{"key":"e_1_3_2_1_18_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_19_1","volume-title":"Awac: Accelerating online reinforcement learning with offline datasets. arXiv preprint arXiv:2006.09359","author":"Nair Ashvin","year":"2020","unstructured":"Ashvin Nair, Abhishek Gupta, Murtaza Dalal, and Sergey Levine. 2020. Awac: Accelerating online reinforcement learning with offline datasets. arXiv preprint arXiv:2006.09359 (2020)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"e_1_3_2_1_21_1","volume-title":"2017 AAAI Spring Symposium Series.","author":"Packard Brandon","year":"2017","unstructured":"Brandon Packard and Santiago Ontan\u00f3n. 2017. Policies for active learning from demonstration. In 2017 AAAI Spring Symposium Series."},{"key":"e_1_3_2_1_22_1","volume-title":"Alvinn: An autonomous land vehicle in a neural network. Advances in neural information processing systems 1","author":"Pomerleau A","year":"1988","unstructured":"Dean\u00a0A Pomerleau. 1988. Alvinn: An autonomous land vehicle in a neural network. Advances in neural information processing systems 1 (1988)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10085-1"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2970619"},{"key":"e_1_3_2_1_25_1","volume-title":"Prioritized experience replay. arXiv preprint arXiv:1511.05952","author":"Schaul Tom","year":"2015","unstructured":"Tom Schaul, John Quan, Ioannis Antonoglou, and David Silver. 2015. Prioritized experience replay. arXiv preprint arXiv:1511.05952 (2015)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6224757"},{"key":"e_1_3_2_1_27_1","volume-title":"Parrot: Data-driven behavioral priors for reinforcement learning. arXiv preprint arXiv:2011.10024","author":"Singh Avi","year":"2020","unstructured":"Avi Singh, Huihan Liu, Gaoyue Zhou, Albert Yu, Nicholas Rhinehart, and Sergey Levine. 2020. Parrot: Data-driven behavioral priors for reinforcement learning. arXiv preprint arXiv:2011.10024 (2020)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/2936924.2936990"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/2031678.2031705"},{"key":"e_1_3_2_1_30_1","volume-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. arXiv preprint arXiv:1707.08817","author":"Vecerik Mel","year":"2017","unstructured":"Mel Vecerik, Todd Hester, Jonathan Scholz, Fumin Wang, Olivier Pietquin, Bilal Piot, Nicolas Heess, Thomas Roth\u00f6rl, Thomas Lampe, and Martin Riedmiller. 2017. Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. arXiv preprint arXiv:1707.08817 (2017)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Zhaodong Wang and Matthew\u00a0E Taylor. 2017. Improving Reinforcement Learning with Confidence-Based Demonstrations.. In IJCAI. 3027\u20133033.","DOI":"10.24963\/ijcai.2017\/422"}],"event":{"name":"HAI '24: International Conference on Human-Agent Interaction","location":"Swansea United Kingdom","acronym":"HAI '24","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 12th International Conference on Human-Agent Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3687272.3688298","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3687272.3688298","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:37:54Z","timestamp":1755866274000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3687272.3688298"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"references-count":31,"alternative-id":["10.1145\/3687272.3688298","10.1145\/3687272"],"URL":"https:\/\/doi.org\/10.1145\/3687272.3688298","relation":{},"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"2024-11-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}