{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,2]],"date-time":"2026-07-02T16:43:30Z","timestamp":1783010610080,"version":"3.54.6"},"reference-count":57,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128329","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"4337-4344","source":"Crossref","is-referenced-by-count":3,"title":["Robi Butler: Multimodal Remote Interaction with a Household Robot Assistant"],"prefix":"10.1109","author":[{"given":"Anxing","family":"Xiao","sequence":"first","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nuwan","family":"Janaka","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tianrun","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anshul","family":"Gupta","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kaixin","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cunjun","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David","family":"Hsu","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore,Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"Conference on Robot Learning","author":"Brohan","year":"2022"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10139-z"},{"key":"ref3","article-title":"Homerobot: Open-vocabulary mobile manipulation","volume-title":"Conference on Robot Learning","author":"Yenamandra","year":"2023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.091"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2010.5453186"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2010.5453184"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v25i1.7979"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2014.x.041"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1177\/0278364915602060"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460699"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794441"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919897133"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2021.xvii.020"},{"key":"ref14","article-title":"Cliport: What and where pathways for robotic manipulation","volume-title":"Conference on robot learning","author":"Shridhar","year":"2021"},{"key":"ref15","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"International Conference on Machine Learning","author":"Huang","year":"2022"},{"key":"ref16","article-title":"Inner monologue: Embodied reasoning through planning with language models","volume-title":"Conference on Robot Learning","author":"Huang","year":"2022"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00280"},{"key":"ref18","first-page":"8469","article-title":"Palm-e: an embodied multimodal language model","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Driess","year":"2023"},{"key":"ref19","article-title":"Sayplan: Grounding large language models using 3d scene graphs for scalable robot task planning","volume-title":"Conference on Robot Learning","author":"Rana","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.025"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2020.555265"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2008.4651216"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1145\/2909824.3020249","article-title":"A comparison of remote robot teleoperation interfaces for general object manipulation","volume-title":"Proceedings of the 2017 ACM\/IEEE International Conference on Human-Robot Interaction","author":"Kent","year":"2017"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.9051"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487507"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00142"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3194683"},{"key":"ref28","article-title":"Gesture-informed robot assistance via foundation models","volume-title":"7th Annual Conference on Robot Learning","author":"Lin","year":"2023"},{"key":"ref29","article-title":"Habitat 2.0: Training home assistants to rearrange their habitat","author":"Szot","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref30","article-title":"igibson 2.0: Object-centric simulation for robot learning of everyday household tasks","volume-title":"Conference on Robot Learning","author":"Li","year":"2021"},{"key":"ref31","article-title":"Behavior: Benchmark for everyday household activities in virtual, interactive, and ecological environments","volume-title":"Conference on Robot Learning","author":"Srivastava","year":"2021"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/S0921-8890(98)00067-0"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2009.5354526"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385907"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560774"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.055"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560757"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2008.4543527"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adh3834"},{"key":"ref40","article-title":"Open-television: Teleoperation with immersive active visual feedback","volume-title":"Conference on Robot Learning","author":"Cheng","year":"2024"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/SIEDS49339.2020.9106630"},{"key":"ref42","first-page":"1","article-title":"Fetch and freight: Standard platforms for service robot applications","volume-title":"Workshop on autonomous mobile service robots","author":"Wise","year":"2016"},{"key":"ref43","article-title":"Robust speech recognition via large-scale weak super-vision","volume-title":"International conference on machine learning","author":"Radford","year":"2023"},{"key":"ref44","article-title":"Scaling open-vocabulary object detection","volume":"36","author":"Minderer","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561877"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2011.2181749"},{"key":"ref48","author":"Chen","year":"2023","journal-title":"LIm-state: Open world state repre-sentation for long-horizon task planning with large language model"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.016"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2006.889486"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.5771\/9781598889857-395"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611354"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1177\/154193120605000909"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.5948\/upo9781614440260.011"},{"key":"ref55","author":"Ullman","year":"2019","journal-title":"Mdmt: multi-dimensional measure of trust"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1080\/10447310802205776"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.066"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128329.pdf?arnumber=11128329","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:07:28Z","timestamp":1756879648000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128329\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128329","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}