{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:10:01Z","timestamp":1755875401716,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,8]],"date-time":"2023-12-08T00:00:00Z","timestamp":1701993600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,8]]},"DOI":"10.1145\/3638584.3638661","type":"proceedings-article","created":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T11:15:19Z","timestamp":1710414919000},"page":"46-52","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Improve the efficiency of deep reinforcement learning through semantic exploration guided by natural language"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3416-7848","authenticated-orcid":false,"given":"Zhourui","family":"Guo","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and Institute of Automation,Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8782-5918","authenticated-orcid":false,"given":"Meng","family":"Yao","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2711-7746","authenticated-orcid":false,"given":"Yang","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and Institute of Automation,Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3442-6275","authenticated-orcid":false,"given":"Qiyue","family":"Yin","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and Institute of Automation,Chinese Academy of Sciences, China"}]}],"member":"320","published-online":{"date-parts":[[2024,3,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Grounding language to autonomously-acquired skills via goal generation. arXiv preprint arXiv:2006.07185","author":"Akakzia Ahmed","year":"2020","unstructured":"Ahmed Akakzia, C\u00e9dric Colas, Pierre-Yves Oudeyer, Mohamed Chetouani, and Olivier Sigaud. 2020. Grounding language to autonomously-acquired skills via goal generation. arXiv preprint arXiv:2006.07185 (2020)."},{"key":"e_1_3_2_1_2_1","volume-title":"Learning to understand goal specifications by modelling reward. arXiv preprint arXiv:1806.01946","author":"Bahdanau Dzmitry","year":"2018","unstructured":"Dzmitry Bahdanau, Felix Hill, Jan Leike, Edward Hughes, Arian Hosseini, Pushmeet Kohli, and Edward Grefenstette. 2018. Learning to understand goal specifications by modelling reward. arXiv preprint arXiv:1806.01946 (2018)."},{"key":"e_1_3_2_1_3_1","volume-title":"Learning with amigo: Adversarially motivated intrinsic goals. arXiv preprint arXiv:2006.12122","author":"Campero Andres","year":"2020","unstructured":"Andres Campero, Roberta Raileanu, Heinrich K\u00fcttler, Joshua\u00a0B Tenenbaum, Tim Rockt\u00e4schel, and Edward Grefenstette. 2020. Learning with amigo: Adversarially motivated intrinsic goals. arXiv preprint arXiv:2006.12122 (2020)."},{"key":"e_1_3_2_1_4_1","first-page":"12478","article-title":"Eager: Asking and answering questions for automatic reward shaping in language-guided rl","volume":"35","author":"Carta Thomas","year":"2022","unstructured":"Thomas Carta, Pierre-Yves Oudeyer, Olivier Sigaud, and Sylvain Lamprier. 2022. Eager: Asking and answering questions for automatic reward shaping in language-guided rl. Advances in Neural Information Processing Systems 35 (2022), 12478\u201312490.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","volume-title":"ACTRCE: Augmenting Experience via Teacher\u2019s Advice For Multi-Goal Reinforcement Learning. arXiv preprint arXiv:1902.04546","author":"Chan Harris","year":"2019","unstructured":"Harris Chan, Yuhuai Wu, Jamie Kiros, Sanja Fidler, and Jimmy Ba. 2019. ACTRCE: Augmenting Experience via Teacher\u2019s Advice For Multi-Goal Reinforcement Learning. arXiv preprint arXiv:1902.04546 (2019)."},{"key":"e_1_3_2_1_6_1","first-page":"3761","article-title":"Language as a cognitive tool to imagine goals in curiosity driven exploration","volume":"33","author":"Colas C\u00e9dric","year":"2020","unstructured":"C\u00e9dric Colas, Tristan Karch, Nicolas Lair, Jean-Michel Dussoux, Cl\u00e9ment Moulin-Frier, Peter Dominey, and Pierre-Yves Oudeyer. 2020. Language as a cognitive tool to imagine goals in curiosity driven exploration. Advances in Neural Information Processing Systems 33 (2020), 3761\u20133774.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","volume-title":"From language to goals: Inverse reinforcement learning for vision-based instruction following. arXiv preprint arXiv:1902.07742","author":"Fu Justin","year":"2019","unstructured":"Justin Fu, Anoop Korattikara, Sergey Levine, and Sergio Guadarrama. 2019. From language to goals: Inverse reinforcement learning for vision-based instruction following. arXiv preprint arXiv:1902.07742 (2019)."},{"key":"e_1_3_2_1_8_1","volume-title":"Conference on Robot Learning. PMLR, 485\u2013497","author":"Goyal Prasoon","year":"2021","unstructured":"Prasoon Goyal, Scott Niekum, and Raymond Mooney. 2021. Pixl2r: Guiding reinforcement learning using natural language by mapping pixels to rewards. In Conference on Robot Learning. PMLR, 485\u2013497."},{"key":"e_1_3_2_1_9_1","volume-title":"Using natural language for reward shaping in reinforcement learning. arXiv preprint arXiv:1903.02020","author":"Goyal Prasoon","year":"2019","unstructured":"Prasoon Goyal, Scott Niekum, and Raymond\u00a0J Mooney. 2019. Using natural language for reward shaping in reinforcement learning. arXiv preprint arXiv:1903.02020 (2019)."},{"key":"e_1_3_2_1_10_1","volume-title":"Policy shaping: Integrating human feedback with reinforcement learning. Advances in neural information processing systems 26","author":"Griffith Shane","year":"2013","unstructured":"Shane Griffith, Kaushik Subramanian, Jonathan Scholz, Charles\u00a0L Isbell, and Andrea\u00a0L Thomaz. 2013. Policy shaping: Integrating human feedback with reinforcement learning. Advances in neural information processing systems 26 (2013)."},{"key":"e_1_3_2_1_11_1","first-page":"16899","article-title":"Dynamic population-based meta-learning for multi-agent communication with natural language","volume":"34","author":"Gupta Abhinav","year":"2021","unstructured":"Abhinav Gupta, Marc Lanctot, and Angeliki Lazaridou. 2021. Dynamic population-based meta-learning for multi-agent communication with natural language. Advances in Neural Information Processing Systems 34 (2021), 16899\u201316912.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","volume-title":"International Conference on Machine Learning. PMLR, 4051\u20134062","author":"Hanjie W","year":"2021","unstructured":"Austin\u00a0W Hanjie, Victor\u00a0Y Zhong, and Karthik Narasimhan. 2021. Grounding language to entities and dynamics for generalization in reinforcement learning. In International Conference on Machine Learning. PMLR, 4051\u20134062."},{"key":"e_1_3_2_1_13_1","volume-title":"Grounded language learning in a simulated 3d world. arXiv preprint arXiv:1706.06551","author":"Hermann Karl\u00a0Moritz","year":"2017","unstructured":"Karl\u00a0Moritz Hermann, Felix Hill, Simon Green, Fumin Wang, Ryan Faulkner, Hubert Soyer, David Szepesvari, Wojciech\u00a0Marian Czarnecki, Max Jaderberg, Denis Teplyashin, 2017. Grounded language learning in a simulated 3d world. arXiv preprint arXiv:1706.06551 (2017)."},{"key":"e_1_3_2_1_14_1","volume-title":"Language as an abstraction for hierarchical deep reinforcement learning. Advances in Neural Information Processing Systems 32","author":"Jiang Yiding","year":"2019","unstructured":"Yiding Jiang, Shixiang\u00a0Shane Gu, Kevin\u00a0P Murphy, and Chelsea Finn. 2019. Language as an abstraction for hierarchical deep reinforcement learning. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.215"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/1622737.1622748"},{"key":"e_1_3_2_1_17_1","volume-title":"Grounded Question Answering for Curiosity-Driven Exploration. arXiv preprint arXiv:2104.11902","author":"Kaur Jivat\u00a0Neet","year":"2021","unstructured":"Jivat\u00a0Neet Kaur, Yiding Jiang, and Paul\u00a0Pu Liang. 2021. Ask & Explore: Grounded Question Answering for Curiosity-Driven Exploration. arXiv preprint arXiv:2104.11902 (2021)."},{"key":"e_1_3_2_1_18_1","volume-title":"Emergent multi-agent communication in the deep learning era. arXiv preprint arXiv:2006.02419","author":"Lazaridou Angeliki","year":"2020","unstructured":"Angeliki Lazaridou and Marco Baroni. 2020. Emergent multi-agent communication in the deep learning era. arXiv preprint arXiv:2006.02419 (2020)."},{"key":"e_1_3_2_1_19_1","volume-title":"Multi-agent communication meets natural language: Synergies between functional and structural language learning. arXiv preprint arXiv:2005.07064","author":"Lazaridou Angeliki","year":"2020","unstructured":"Angeliki Lazaridou, Anna Potapenko, and Olivier Tieleman. 2020. Multi-agent communication meets natural language: Synergies between functional and structural language learning. arXiv preprint arXiv:2005.07064 (2020)."},{"key":"e_1_3_2_1_20_1","volume-title":"Deal or no deal? end-to-end learning for negotiation dialogues. arXiv preprint arXiv:1706.05125","author":"Lewis Mike","year":"2017","unstructured":"Mike Lewis, Denis Yarats, Yann\u00a0N Dauphin, Devi Parikh, and Dhruv Batra. 2017. Deal or no deal? end-to-end learning for negotiation dialogues. arXiv preprint arXiv:1706.05125 (2017)."},{"key":"e_1_3_2_1_21_1","volume-title":"Inferring rewards from language in context. arXiv preprint arXiv:2204.02515","author":"Lin Jessy","year":"2022","unstructured":"Jessy Lin, Daniel Fried, Dan Klein, and Anca Dragan. 2022. Inferring rewards from language in context. arXiv preprint arXiv:2204.02515 (2022)."},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Machine Learning. PMLR, 14073\u201314093","author":"Liu Iou-Jen","year":"2022","unstructured":"Iou-Jen Liu, Xingdi Yuan, Marc-Alexandre C\u00f4t\u00e9, Pierre-Yves Oudeyer, and Alexander Schwing. 2022. Asking for knowledge (afk): Training rl agents to query external knowledge using language. In International Conference on Machine Learning. PMLR, 14073\u201314093."},{"key":"e_1_3_2_1_23_1","volume-title":"On the interaction between supervision and self-play in emergent communication. arXiv preprint arXiv:2002.01093","author":"Lowe Ryan","year":"2020","unstructured":"Ryan Lowe, Abhinav Gupta, Jakob Foerster, Douwe Kiela, and Joelle Pineau. 2020. On the interaction between supervision and self-play in emergent communication. arXiv preprint arXiv:2002.01093 (2020)."},{"key":"e_1_3_2_1_24_1","volume-title":"A survey of reinforcement learning informed by natural language. arXiv preprint arXiv:1906.03926","author":"Luketina Jelena","year":"2019","unstructured":"Jelena Luketina, Nantas Nardelli, Gregory Farquhar, Jakob Foerster, Jacob Andreas, Edward Grefenstette, Shimon Whiteson, and Tim Rockt\u00e4schel. 2019. A survey of reinforcement learning informed by natural language. arXiv preprint arXiv:1906.03926 (2019)."},{"key":"e_1_3_2_1_25_1","first-page":"29529","article-title":"Ella: Exploration through learned language abstraction","volume":"34","author":"Mirchandani Suvir","year":"2021","unstructured":"Suvir Mirchandani, Siddharth Karamcheti, and Dorsa Sadigh. 2021. Ella: Exploration through learned language abstraction. Advances in Neural Information Processing Systems 34 (2021), 29529\u201329540.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","first-page":"33947","article-title":"Improving intrinsic exploration with language abstractions","volume":"35","author":"Mu Jesse","year":"2022","unstructured":"Jesse Mu, Victor Zhong, Roberta Raileanu, Minqi Jiang, Noah Goodman, Tim Rockt\u00e4schel, and Edward Grefenstette. 2022. Improving intrinsic exploration with language abstractions. Advances in Neural Information Processing Systems 35 (2022), 33947\u201333960.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_27_1","volume-title":"Conference on Robot Learning. PMLR, 1303\u20131315","author":"Nair Suraj","year":"2022","unstructured":"Suraj Nair, Eric Mitchell, Kevin Chen, Silvio Savarese, Chelsea Finn, 2022. Learning language-conditioned robot behavior from offline data and crowd-sourced annotation. In Conference on Robot Learning. PMLR, 1303\u20131315."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.11263"},{"key":"e_1_3_2_1_29_1","volume-title":"Ride: Rewarding impact-driven exploration for procedurally-generated environments. arXiv preprint arXiv:2002.12292","author":"Raileanu Roberta","year":"2020","unstructured":"Roberta Raileanu and Tim Rockt\u00e4schel. 2020. Ride: Rewarding impact-driven exploration for procedurally-generated environments. arXiv preprint arXiv:2002.12292 (2020)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i7.16749"},{"key":"e_1_3_2_1_31_1","first-page":"25377","article-title":"Semantic exploration from language abstractions and pretrained representations","volume":"35","author":"Tam Allison","year":"2022","unstructured":"Allison Tam, Neil Rabinowitz, Andrew Lampinen, Nicholas\u00a0A Roy, Stephanie Chan, DJ Strouse, Jane Wang, Andrea Banino, and Felix Hill. 2022. Semantic exploration from language abstractions and pretrained representations. Advances in Neural Information Processing Systems 35 (2022), 25377\u201325389.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","volume-title":"Influencing reinforcement learning through natural language guidance. arXiv preprint arXiv:2104.01506","author":"Tasrin Tasmia","year":"2021","unstructured":"Tasmia Tasrin, Md\u00a0Sultan\u00a0Al Nahian, Habarakadage Perera, and Brent Harrison. 2021. Influencing reinforcement learning through natural language guidance. arXiv preprint arXiv:2104.01506 (2021)."},{"volume-title":"Artificial intelligence and machine learning for multi-domain operations applications, Vol.\u00a011006","author":"Waytowich Nicholas","key":"e_1_3_2_1_33_1","unstructured":"Nicholas Waytowich, Sean\u00a0L Barton, Vernon Lawhern, Ethan Stump, and Garrett Warnell. 2019. Grounding natural language commands to StarCraft II game states for narration-guided reinforcement learning. In Artificial intelligence and machine learning for multi-domain operations applications, Vol.\u00a011006. SPIE, 267\u2013276."},{"key":"e_1_3_2_1_34_1","volume-title":"A narration-based reward shaping approach using grounded natural language commands. arXiv preprint arXiv:1911.00497","author":"Waytowich Nicholas","year":"2019","unstructured":"Nicholas Waytowich, Sean\u00a0L Barton, Vernon Lawhern, and Garrett Warnell. 2019. A narration-based reward shaping approach using grounded natural language commands. arXiv preprint arXiv:1911.00497 (2019)."},{"key":"e_1_3_2_1_35_1","volume-title":"Interactive grounded language acquisition and generalization in a 2d world. arXiv preprint arXiv:1802.01433","author":"Yu Haonan","year":"2018","unstructured":"Haonan Yu, Haichao Zhang, and Wei Xu. 2018. Interactive grounded language acquisition and generalization in a 2d world. arXiv preprint arXiv:1802.01433 (2018)."},{"key":"e_1_3_2_1_36_1","first-page":"21505","article-title":"Silg: The multi-domain symbolic interactive language grounding benchmark","volume":"34","author":"Zhong Victor","year":"2021","unstructured":"Victor Zhong, Austin\u00a0W Hanjie, Sida Wang, Karthik Narasimhan, and Luke Zettlemoyer. 2021. Silg: The multi-domain symbolic interactive language grounding benchmark. Advances in Neural Information Processing Systems 34 (2021), 21505\u201321519.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","volume-title":"Rtfm: Generalising to novel environment dynamics via reading. arXiv preprint arXiv:1910.08210","author":"Zhong Victor","year":"2019","unstructured":"Victor Zhong, Tim Rockt\u00e4schel, and Edward Grefenstette. 2019. Rtfm: Generalising to novel environment dynamics via reading. arXiv preprint arXiv:1910.08210 (2019)."}],"event":{"name":"CSAI 2023: 2023 7th International Conference on Computer Science and Artificial Intelligence","acronym":"CSAI 2023","location":"Beijing China"},"container-title":["Proceedings of the 2023 7th International Conference on Computer Science and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3638584.3638661","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3638584.3638661","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:55:05Z","timestamp":1755874505000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3638584.3638661"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,8]]},"references-count":37,"alternative-id":["10.1145\/3638584.3638661","10.1145\/3638584"],"URL":"https:\/\/doi.org\/10.1145\/3638584.3638661","relation":{},"subject":[],"published":{"date-parts":[[2023,12,8]]},"assertion":[{"value":"2024-03-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}