{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T10:41:10Z","timestamp":1766054470995,"version":"3.48.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11247137","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"18430-18437","source":"Crossref","is-referenced-by-count":0,"title":["Is the House Ready For Sleeptime? Generating and Evaluating Situational Queries for Embodied Question Answering"],"prefix":"10.1109","author":[{"given":"Vishnu Sashank","family":"Dorbala","sequence":"first","affiliation":[{"name":"University of Maryland,College Park"}]},{"given":"Prasoon","family":"Goyal","sequence":"additional","affiliation":[{"name":"Amazon AGI"}]},{"given":"Robinson","family":"Piramuthu","sequence":"additional","affiliation":[{"name":"Amazon AGI"}]},{"given":"Michael","family":"Johnston","sequence":"additional","affiliation":[{"name":"Amazon AGI"}]},{"given":"Reza","family":"Ghanadan","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park"}]},{"given":"Dinesh","family":"Manocha","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00008"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3251984"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00170"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00647"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00430"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3277206"},{"key":"ref8","article-title":"Amazon Mechanical Turk \u2014 mturk.com"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2022.3141105"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160748"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.14778\/3342263.3342271"},{"issue":"6","key":"ref12","article-title":"Language models are unsupervised multitask learners","volume-title":"OpenAI Blog","volume":"23","year":"2020"},{"year":"2023","key":"ref13","article-title":"Gpt-4 technical report"},{"article-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref14"},{"article-title":"Palm: Scaling language modeling with pathways","year":"2022","author":"Chowdhery","key":"ref15"},{"article-title":"Palm-e: An embodied multimodal language model","year":"2023","author":"Driess","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.67"},{"key":"ref18","article-title":"Can an embodied agent find your","author":"Dorbala","year":"2023","journal-title":"cat-shaped mug\"? llm-based zero-shot object navigation,\" arXiv preprint arXiv:2303.03480"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28597"},{"key":"ref20","first-page":"492","article-title":"Lm-nav: Robotic navigation with large pre-trained models of language, vision, and action","volume-title":"Conference on Robot Learning","author":"Shah"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00280"},{"article-title":"Roberta: A robustly optimized bert pretraining approach","year":"2019","author":"Liu","key":"ref22"},{"article-title":"Modern hierarchical, agglomerative clustering algorithms","year":"2011","author":"M\u00fcllner","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.00500"},{"article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","year":"2023","author":"Li","key":"ref25"},{"key":"ref26","first-page":"12 888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"International Conference on Machine Learning","author":"Li"},{"key":"ref27","first-page":"5583","article-title":"Vilt: Vision-and-language transformer without convolution or region supervision","volume-title":"International Conference on Machine Learning","author":"Kim"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11247137.pdf?arnumber=11247137","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T10:37:40Z","timestamp":1766054260000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11247137\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11247137","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}