{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,14]],"date-time":"2025-12-14T16:15:06Z","timestamp":1765728906350,"version":"3.37.3"},"reference-count":30,"publisher":"Informa UK Limited","issue":"18","funder":[{"DOI":"10.13039\/501100020962","name":"ACT-X","doi-asserted-by":"publisher","award":["JPMJAX20A5"],"award-info":[{"award-number":["JPMJAX20A5"]}],"id":[{"id":"10.13039\/501100020962","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Advanced Robotics"],"published-print":{"date-parts":[[2024,9,16]]},"DOI":"10.1080\/01691864.2024.2393409","type":"journal-article","created":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T19:33:00Z","timestamp":1724182380000},"page":"1307-1317","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":3,"title":["Reflex-based open-vocabulary navigation without prior knowledge using omnidirectional camera and multiple vision-language models"],"prefix":"10.1080","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7464-7187","authenticated-orcid":false,"given":"Kento","family":"Kawaharazuka","sequence":"first","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1429-4401","authenticated-orcid":false,"given":"Yoshiki","family":"Obinata","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"given":"Naoaki","family":"Kanazawa","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"given":"Naoto","family":"Tsukamoto","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"given":"Kei","family":"Okada","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]},{"given":"Masayuki","family":"Inaba","sequence":"additional","affiliation":[{"name":"The Department of Mechano-Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan"}]}],"member":"301","published-online":{"date-parts":[[2024,8,20]]},"reference":[{"key":"e_1_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3177853"},{"key":"e_1_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2010.939925"},{"key":"e_1_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.26599\/TST.2021.9010012"},{"key":"e_1_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Das A Datta S Gkioxari G et al. Embodied question answering. In: Proceedings of the 2018 IEEE\/CVF International Conference on Computer Vision and Pattern Recognition;\u00a0Salt Lake City USA; 2018.","DOI":"10.1109\/CVPR.2018.00008"},{"key":"e_1_3_2_6_1","unstructured":"Shah D Osinski B Ichter B et al. Robotic navigation with large pre-trained models of language vision and action. In: Proceedings of the 2022 Conference on Robot Learning; Auckland New Zealand; 2022."},{"key":"e_1_3_2_7_1","unstructured":"Shafiullah NMM Paxton C Pinto L et al. CLIP-fields: weakly supervised semantic fields for robotic memory. In: Proceedings of the Robotics: Science and Systems 2023;\u00a0Delft Netherlands; 2023. p. 1\u201311."},{"key":"e_1_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160969"},{"key":"e_1_3_2_9_1","doi-asserted-by":"crossref","unstructured":"Gadre SY Wortsman M Ilharco G et al. Cows on pasture: baselines and benchmarks for language-driven zero-shot object navigation. In: Proceedings of the 2023 IEEE\/CVF International Conference on Computer Vision and Pattern Recognition; Vancouver Canada; 2023. p. 23171\u201323181.","DOI":"10.1109\/CVPR52729.2023.02219"},{"key":"e_1_3_2_10_1","doi-asserted-by":"crossref","unstructured":"Jatavallabhula KM Kuwajerwala A Gu Q et al. ConceptFusion: open-set multimodal 3D mapping. In: Proceedings of the 2020 Robotics: Science and Systems; Auckland New Zealand; 2023.","DOI":"10.15607\/RSS.2023.XIX.066"},{"key":"e_1_3_2_11_1","unstructured":"Kobilarov M Sukhatme G Hyams J et al. People tracking and following with mobile robot using an omnidirectional camera and a laser. In: Proceedings of the 2006 IEEE International Conference on Robotics and Automation;\u00a0Orlando USA; 2006. p. 557\u2013562."},{"key":"e_1_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Markovi\u0107 I Chaumette F Petrovi\u0107 I. Moving object detection tracking and following using an omnidirectional camera on a mobile robot. In: Proceedings of the 2014 IEEE International Conference on Robotics and Automation; Hong Kong China; 2014. p. 5630\u20135635.","DOI":"10.1109\/ICRA.2014.6907687"},{"key":"e_1_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Rituerto A Puig L Guerrero JJ. Visual SLAM with an omnidirectional camera. In: Proceedings of the 20th International Conference on Pattern Recognition;\u00a0Istanbul Turkey; 2010. p. 348\u2013351.","DOI":"10.1109\/ICPR.2010.94"},{"key":"e_1_3_2_14_1","doi-asserted-by":"crossref","unstructured":"Winters N Gaspar J Lacey G et al. Omni-directional vision for robot navigation. In: Proceedings of the IEEE Workshop on Omnidirectional Vision;\u00a0Hilton Head Island USA; 2000. p. 21\u201328.","DOI":"10.1109\/OMNVIS.2000.853799"},{"key":"e_1_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-013-9342-3"},{"key":"e_1_3_2_16_1","unstructured":"Radford A Kim JW Hallacy C et\u00a0al. Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning; Online; 2021. p. 8748\u20138763."},{"key":"e_1_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Zhou X Girdhar R Joulin A et al. Detecting twenty-thousand classes using image-level supervision. In: Proceedings of the 2022 European Conference on Computer Vision;\u00a0Istanbul Turkey; 2022.","DOI":"10.1007\/978-3-031-20077-9_21"},{"key":"e_1_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/JRA.1986.1087032"},{"key":"e_1_3_2_19_1","doi-asserted-by":"crossref","unstructured":"Rosenblatt JK Payton DW. A fine-grained alternative to the subsumption architecture for mobile robot control. In: Proceedings of the 1989 International Joint Conference on Neural Networks;\u00a0San Diego USA; 1989. p. 317\u2013323.","DOI":"10.1109\/IJCNN.1989.118717"},{"key":"e_1_3_2_20_1","doi-asserted-by":"crossref","unstructured":"Duchon AP Warren WH. Robot navigation from a Gibsonian viewpoint. In: Proceedings of the IEEE International Conference on Systems Man and Cybernetics. Vol. 3; San Antonio USA; 1994. p. 2272\u20132277.","DOI":"10.1109\/ICSMC.1994.400203"},{"key":"e_1_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(94)90020-5"},{"key":"e_1_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0921-8890(00)00124-X"},{"key":"e_1_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.8860"},{"key":"e_1_3_2_24_1","doi-asserted-by":"crossref","unstructured":"Ho T Budagavi M. Dual-fisheye lens stitching for 360-degree imaging. In: Proceedings of the 2017 IEEE International Conference on Acoustics Speech and Signal Processing;\u00a0New Orleans USA; 2017. p. 2172\u20132176.","DOI":"10.1109\/ICASSP.2017.7952541"},{"key":"e_1_3_2_25_1","unstructured":"Li F Zhang H Zhang Y et\u00a0al. Vision-language intelligence: tasks representation learning and large models. arXiv preprint arXiv:2203.01922. 2022."},{"key":"e_1_3_2_26_1","unstructured":"Wang P Yang A Men R et al. OFA: unifying architectures tasks and modalities through a simple sequence-to-sequence learning framework. In: Proceedings of the 39th International Conference on Machine Learning;\u00a0Baltimore USA; 2022. p. 23318\u201323340."},{"key":"e_1_3_2_27_1","unstructured":"Li J Li D Savarese S et al. BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Proceedings of the 40th International Conference on Machine Learning; Honolulu USA; 2023. p. 19730\u201319742."},{"key":"e_1_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"e_1_3_2_29_1","unstructured":"Gu X Lin T Kuo W et\u00a0al. Open-vocabulary object detection via vision and language knowledge distillation. In: Proceedings of the 10th International Conference on Learning Representations; Online; 2022. p. 1\u201320."},{"key":"e_1_3_2_30_1","doi-asserted-by":"crossref","unstructured":"Gupta A Dollar P Girshick R. LVIS: a dataset for large vocabulary instance segmentation. In: Proceedings of the 2019 IEEE\/CVF International Conference on Computer Vision and Pattern Recognition;\u00a0Long Beach USA; 2019.","DOI":"10.1109\/CVPR.2019.00550"},{"key":"e_1_3_2_31_1","doi-asserted-by":"crossref","unstructured":"Reimers N Gurevych I. Sentence-BERT: sentence embeddings using siamese BERT-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing;\u00a0Hong Kong China; 2019.","DOI":"10.18653\/v1\/D19-1410"}],"container-title":["Advanced Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01691864.2024.2393409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,17]],"date-time":"2024-10-17T17:39:14Z","timestamp":1729186754000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01691864.2024.2393409"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,20]]},"references-count":30,"journal-issue":{"issue":"18","published-print":{"date-parts":[[2024,9,16]]}},"alternative-id":["10.1080\/01691864.2024.2393409"],"URL":"https:\/\/doi.org\/10.1080\/01691864.2024.2393409","relation":{},"ISSN":["0169-1864","1568-5535"],"issn-type":[{"type":"print","value":"0169-1864"},{"type":"electronic","value":"1568-5535"}],"subject":[],"published":{"date-parts":[[2024,8,20]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tadr20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2023-10-27","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-06-23","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-08-08","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-08-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}