{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:17:38Z","timestamp":1766067458725},"reference-count":80,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,27]]},"DOI":"10.1109\/iros51168.2021.9636208","type":"proceedings-article","created":{"date-parts":[[2021,12,16]],"date-time":"2021-12-16T15:45:38Z","timestamp":1639669538000},"page":"4095-4102","source":"Crossref","is-referenced-by-count":14,"title":["Communicative Learning with Natural Gestures for Embodied Navigation Agents with Human-in-the-Scene"],"prefix":"10.1109","author":[{"given":"Qi","family":"Wu","sequence":"first","affiliation":[]},{"given":"Cheng-Ju","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yixin","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Jungseock","family":"Joo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_45"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00116"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00878"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2016.153"},{"article-title":"Objectnav revisited: On eval-uation of embodied agents navigating to objects","year":"2020","author":"batra","key":"ref76"},{"article-title":"On evaluation of embodied navigation agents","year":"2018","author":"anderson","key":"ref77"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00050"},{"article-title":"Alfworld: Aligning text and embodied environments for interactive learning","year":"2020","author":"shridhar","key":"ref39"},{"article-title":"Allenact: A framework for embodied ai research","year":"2020","author":"weihs","key":"ref75"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01315"},{"article-title":"Adam: A method for stochastic optimization","year":"2014","author":"kingma","key":"ref78"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"article-title":"Towards ai-complete question answering: A set of prerequisite toy tasks","year":"2015","author":"weston","key":"ref33"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00684"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1287"},{"key":"ref37","article-title":"Vision-and-dialog navigation","author":"thomason","year":"2020","journal-title":"Conference on Robot Learning"},{"article-title":"Neural modular control for embodied question answering","year":"2018","author":"das","key":"ref36"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2587640"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793493"},{"journal-title":"Human-robot Interaction A Survey","year":"2008","author":"goodrich","key":"ref62"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref61"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300233"},{"key":"ref28","article-title":"Visual semantic navigation using scene priors","author":"yang","year":"2019","journal-title":"ICLRE"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1521\/soco.2008.26.2.169"},{"key":"ref27","article-title":"Individual vs. joint perception: a pragmatic model of pointing as communicative smithian helping","author":"jiang","year":"2021","journal-title":"CogSci"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/1"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00430"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-00065-7_28"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1063"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-22879-2_53"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-65289-4_44"},{"key":"ref2","article-title":"Automated coding of televised leader displays: Detecting nonverbal political behavior with computer vision and deep learning","author":"joo","year":"2019","journal-title":"International Journal of Communication (19328036)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s13218-017-0505-9"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.2307\/1130423"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2004.1521814"},{"article-title":"Ai2-thor: An interactive 3d environment for visual ai","year":"2017","author":"kolve","key":"ref21"},{"key":"ref24","article-title":"Gesture recognition for humanoid assisted interactive sign language tutoring","author":"ertu?rul","year":"2013","journal-title":"SIU"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2005.12.020"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CAC48633.2019.8997339"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/MIM.2019.8674634"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"article-title":"Minos: Multimodal indoor simulator for navigation in complex environments","year":"2017","author":"savva","key":"ref51"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/70.508439"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"article-title":"Chalet: Cornell house agent learning environment","year":"2018","author":"yan","key":"ref57"},{"article-title":"Deepmind lab","year":"2016","author":"beattie","key":"ref56"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860433"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01113"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v25i1.7974","article-title":"Learning to interpret natural language navigation instructions from observations","author":"chen","year":"2011","journal-title":"AAAI"},{"article-title":"Interactive grounded language acquisition and generalization in a 2d world","year":"2018","author":"yu","key":"ref40"},{"article-title":"Gated-attention architectures for task-oriented language grounding","year":"2017","author":"chaplot","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.769"},{"key":"ref14","article-title":"Embodied question answering","author":"das","year":"2018","journal-title":"CVPR Workshops"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00679"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01282"},{"key":"ref18","article-title":"Speaker-follower models for vision-and-language navigation","author":"fried","year":"2018","journal-title":"NeurIPS"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01003"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2016.00183"},{"key":"ref3","first-page":"788","article-title":"Under-standing political communication styles in televised debates via body movements","author":"kang","year":"2020","journal-title":"European Conference on Computer Vision"},{"journal-title":"Origins of Human Communication","year":"2010","author":"tomasello","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.eng.2020.01.011"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1515\/lingvan-2017-0041"},{"key":"ref7","article-title":"Multimodal construction grammar","author":"steen","year":"2013","journal-title":"Language and the Creative Mind"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00723"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_2"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197008"},{"article-title":"Building generalizable agents with a realistic and rich 3d environment","year":"2018","author":"wu","key":"ref48"},{"article-title":"Home: A household multimodal environment","year":"2017","author":"brodeur","key":"ref47"},{"article-title":"Babyai++: Towards grounded-language learning beyond memorization","year":"2020","author":"cao","key":"ref42"},{"key":"ref41","article-title":"Babyai: A platform to study the sample efficiency of grounded language learning","author":"chevalier-boisvert","year":"2018","journal-title":"ICLRE"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3321408.3322633"},{"article-title":"Grounded language learning in a simulated 3d world","year":"2017","author":"hermann","key":"ref43"}],"event":{"name":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2021,9,27]]},"location":"Prague, Czech Republic","end":{"date-parts":[[2021,10,1]]}},"container-title":["2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9635848\/9635849\/09636208.pdf?arnumber=9636208","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,18]],"date-time":"2023-01-18T17:39:22Z","timestamp":1674063562000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9636208\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,27]]},"references-count":80,"URL":"https:\/\/doi.org\/10.1109\/iros51168.2021.9636208","relation":{},"subject":[],"published":{"date-parts":[[2021,9,27]]}}}