{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T15:38:25Z","timestamp":1769269105319,"version":"3.49.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/iros55552.2023.10342165","type":"proceedings-article","created":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T19:17:55Z","timestamp":1702495075000},"page":"3865-3872","source":"Crossref","is-referenced-by-count":10,"title":["Switching Head-Tail Funnel UNITER for Dual Referring Expression Comprehension with Fetch-and-Carry Tasks"],"prefix":"10.1109","author":[{"given":"Ryosuke","family":"Korekata","sequence":"first","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Motonari","family":"Kambara","sequence":"additional","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Yoshida","sequence":"additional","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shintaro","family":"Ishikawa","sequence":"additional","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yosuke","family":"Kawasaki","sequence":"additional","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masaki","family":"Takahashi","sequence":"additional","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Komei","family":"Sugiura","sequence":"additional","affiliation":[{"name":"Keio University,Yokohama,Kanagawa,Japan,223-8522"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1186\/s40648-019-0132-3"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3178804"},{"key":"ref5","article-title":"Prompter: Utilizing Large Language Model Prompting for a Data Efficient Embodied Instruction Following","author":"Inoue","year":"2022","journal-title":"arXiv preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460699"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2926223"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2963649"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3108500"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2015.08.002"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2019.1663608"},{"key":"ref12","article-title":"LM-Nav: Robotic Navigation with Large Pre- Trained Models of Language, Vision, and Action","author":"Shah","year":"2022","journal-title":"CoRL"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160969"},{"key":"ref14","first-page":"894","article-title":"CLIPORT: What and Where Pathways for Robotic Manipulation","author":"Shridhar","year":"2022","journal-title":"CoRL"},{"key":"ref15","article-title":"Modularity through Attention: Efficient Training and Transfer of Language-Conditioned Policies for Robot Manipulation","author":"Zhou","year":"2022","journal-title":"CoRL"},{"key":"ref16","first-page":"14 829","article-title":"Simple but Effective: CLIP Embed-dings for Embodied AI","author":"Khandelwal","year":"2022","journal-title":"CVPR"},{"key":"ref17","article-title":"Do As I Can, Not As I Say: Grounding Language in Robotic Affordances","author":"Ahn","year":"2022","journal-title":"arXiv preprint"},{"key":"ref18","article-title":"On Evaluation of Embod-ied Navigation Agents","author":"Anderson","year":"2018","journal-title":"arXiv preprint"},{"key":"ref19","article-title":"Attention Is All You Need","volume":"30","author":"Vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01604"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR56361.2022.9956163"},{"key":"ref22","article-title":"VTNet: Visual Transformer Network for Object Goal Navigation","author":"Du","year":"2021","journal-title":"ICLR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812027"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.11688"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2021.07.009"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-022-1369-5"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref28","first-page":"23 318","article-title":"OFA: Unifying Architectures, Tasks, and Modalities Through a Simple Sequence-to-Sequence Learning Framework","author":"Wang","year":"2022","journal-title":"ICML"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01762"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_5"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2020.3010735"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref36","first-page":"4171","article-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"Devlin","year":"2019","journal-title":"NAACL-HLT"},{"key":"ref37","first-page":"4271","article-title":"Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing","volume":"33","author":"Dai","year":"2020","journal-title":"NeurIPS"},{"key":"ref38","first-page":"8748","article-title":"Learning Transferable Visual Models From Natural Language Supervision","author":"Radford","year":"2021","journal-title":"ICML"},{"key":"ref39","year":"2020","journal-title":"World Robot Summit 2020 Partner Robot Challenge Real Space Rules & Regulations"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2015.2448951"}],"event":{"name":"2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Detroit, MI, USA","start":{"date-parts":[[2023,10,1]]},"end":{"date-parts":[[2023,10,5]]}},"container-title":["2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10341341\/10341342\/10342165.pdf?arnumber=10342165","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,20]],"date-time":"2023-12-20T00:15:54Z","timestamp":1703031354000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10342165\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/iros55552.2023.10342165","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}