{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:17:32Z","timestamp":1771949852927,"version":"3.50.1"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128120","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"12430-12436","source":"Crossref","is-referenced-by-count":2,"title":["Space-Aware Instruction Tuning: Dataset and Benchmark for Guide Dog Robots Assisting the Visually Impaired"],"prefix":"10.1109","author":[{"given":"ByungOk","family":"Han","sequence":"first","affiliation":[{"name":"ETRI,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Woo-han","family":"Yun","sequence":"additional","affiliation":[{"name":"ETRI,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Beom-Su","family":"Seo","sequence":"additional","affiliation":[{"name":"ETRI,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jaehong","family":"Kim","sequence":"additional","affiliation":[{"name":"ETRI,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIT.2014.6894906"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561786"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s00146-024-01879-2"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1299\/jamdsm.4.194"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642181"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref7","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref8","article-title":"Ferret: Refer and ground anything anywhere at any granularity","author":"You","year":"2023","journal-title":"arXiv preprint"},{"key":"ref9","first-page":"19730","article-title":"Blip-2: Bootstrapping languageimage pre-training with frozen image encoders and large language models","volume-title":"International conference on machine learning. PMLR","author":"Li"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/UR61395.2024.10597464"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340734"},{"key":"ref12","volume-title":"Vialm: A survey and benchmark of visually impaired assistance with large models","author":"Zhao","year":"2024"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01370"},{"key":"ref14","article-title":"When and why vision-language models behave like bags-of-words, and what to do about it?","author":"Yuksekgonul","year":"2023","journal-title":"ICLR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00914"},{"key":"ref16","volume-title":"Gemini","year":"2023"},{"key":"ref17","volume-title":"Gpt-4 technical report","year":"2024"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3522757"},{"key":"ref19","volume-title":"Evidence justifying a clear width for a wayfinding path that is enabling for persons with a guide dog","author":"MacLennan","year":"2015"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3184025"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161289"},{"key":"ref22","volume-title":"Ultralytics YOLO","author":"Jocher","year":"2023"},{"key":"ref23","author":"Yang","year":"2024","journal-title":"Depth anything v2"},{"key":"ref24","article-title":"Visual instruction tuning","volume-title":"NeurIPS","author":"Liu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.222"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.818"},{"key":"ref27","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","author":"Zheng","year":"2023","journal-title":"NeurIPS Datasets and Benchmarks Track"},{"key":"ref28","first-page":"65","article-title":"Meteor: An automatic metric for mt evaluation with improved correlation with human judgments","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization","author":"Banerjee"},{"key":"ref29","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text summarization branches out"},{"key":"ref30","article-title":"Bertscore: Evaluating text generation with bert","author":"Zhang","year":"2019","journal-title":"arXiv preprint"},{"key":"ref31","volume-title":"Llava-onevision: Easy visual task transfer","author":"Li","year":"2024"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02484"},{"key":"ref33","author":"Li","year":"2024","journal-title":"Llava-next: Stronger llms supercharge multimodal capabilities in the wild"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128120.pdf?arnumber=11128120","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T21:16:58Z","timestamp":1769721418000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128120\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128120","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}