{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:39:16Z","timestamp":1730266756973,"version":"3.28.0"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,30]],"date-time":"2024-06-30T00:00:00Z","timestamp":1719705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,30]],"date-time":"2024-06-30T00:00:00Z","timestamp":1719705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,30]]},"DOI":"10.1109\/ijcnn60899.2024.10651083","type":"proceedings-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T17:35:05Z","timestamp":1725903305000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Modular Method for Embodied Instruction Following with Environmental Context Adaptation"],"prefix":"10.1109","author":[{"given":"Zhuoqun","family":"Xu","sequence":"first","affiliation":[{"name":"Hunan University,College of Computer Science and Electronic Engineering"}]},{"given":"Liubo","family":"Ouyang","sequence":"additional","affiliation":[{"name":"Hunan University,College of Computer Science and Electronic Engineering"}]},{"given":"Yang","family":"Liu","sequence":"additional","affiliation":[{"name":"Samsung Research China,Beijing"}]},{"given":"Li","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hunan University,College of Computer Science and Electronic Engineering"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00008"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"author":"Szlam","key":"ref4","article-title":"Why Build an Assistant in Minecraft?[J]"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d17-1321"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2965078"},{"article-title":"Do as i can, not as i say: Grounding language in robotic affordances[J]","year":"2022","author":"Ahn","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.368"},{"article-title":"Film: Following instructions in language with modular methods[J]","year":"2021","author":"Min","key":"ref9"},{"key":"ref10","first-page":"706","article-title":"A persistent spatial semantic representation for high-level natural language instruction execution[C]","volume-title":"Conference on Robot Learning","author":"Blukis"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01564"},{"journal-title":"Moca: A modular object-centric approach for interactive instruction following[J]","year":"2020","author":"Singh","key":"ref12"},{"article-title":"Embodied bert: A transformer model for embodied, language-guided visual task completion[J]","year":"2021","author":"Suglia","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20097"},{"key":"ref15","first-page":"6232","article-title":"Task-Driven and Experience-Based Question Answering Corpus for In-Home Robot Application in the House3D Virtual Environment[C]","volume-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference","author":"Xu"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3178804"},{"article-title":"LEBP\u2013Language Expectation & Binding Policy: A Two-Stream Framework for Embodied Vision-and-Language Interaction Task Learning Agents","year":"2022","author":"Liu","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00447"},{"article-title":"Learning To Explore Using Active Neural SLAM","volume-title":"International Conference on Learning Representations (ICLR)","author":"Chaplot","key":"ref19"},{"article-title":"Ai2-thor: an interactive 3d environment for visual ai","volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Kolve","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00430"},{"article-title":"Alfworld: aligning text and embodied environments for interactive learning","volume-title":"Conference on Learning Representations (ICLR)","author":"Shridhar","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00647"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636667"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"issue":"8","key":"ref27","first-page":"17","article-title":"A planning based neural-symbolic approach for embodied instruction following","volume":"9","author":"Liu","year":"2022","journal-title":"Interactions"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"volume-title":"Embodied question answering in photorealistic environments with point cloud perception (CVPR)","author":"Wijmans","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3314943"},{"key":"ref31","first-page":"8024","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","author":"Paszke","year":"2019","journal-title":"NeurIPS"},{"journal-title":"ProcTHOR: Large-Scale Embodied AI Using Procedural Generation","year":"2022","author":"Deitke","key":"ref32"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00323"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197008"},{"key":"ref38","first-page":"2005","article-title":"Learning affordance landscapes for interaction exploration in 3d environments[J]","volume":"33","author":"Nagarajan","year":"2020","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"author":"Inoue","key":"ref39","article-title":"Prompter: Utilizing large language model prompting for a data efficient embodied instruction following[J]"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197318"},{"article-title":"Maniskill2: A unified benchmark for generalizable manipulation skills[J]","year":"2023","author":"Gu","key":"ref41"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/iros47612.2022.9981766"},{"article-title":"Threedworld: A platform for interactive multi-modal physical simulation","year":"2020","author":"Gan","key":"ref43"}],"event":{"name":"2024 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2024,6,30]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,7,5]]}},"container-title":["2024 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10649807\/10649898\/10651083.pdf?arnumber=10651083","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T07:01:18Z","timestamp":1725951678000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10651083\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,30]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/ijcnn60899.2024.10651083","relation":{},"subject":[],"published":{"date-parts":[[2024,6,30]]}}}