{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T19:44:43Z","timestamp":1754163883494,"version":"3.41.2"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3733566.3734430","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T09:09:47Z","timestamp":1750669787000},"page":"32-37","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Integrated Multimodal Interaction: Merging Immersive 3D Worlds with Language Based Retrieval for 3D Scene Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5885-3341","authenticated-orcid":false,"given":"Shawn","family":"Bowser","sequence":"first","affiliation":[{"name":"DEVCOM Army Research Laboratory, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1383-8120","authenticated-orcid":false,"given":"Cynthia","family":"Matuszek","sequence":"additional","affiliation":[{"name":"University of Maryland, Baltimore, County Baltimore, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8761-167X","authenticated-orcid":false,"given":"Stephanie","family":"Lukin","sequence":"additional","affiliation":[{"name":"DEVCOM Army Research Laboratory, Los Angeles, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1186\/s41018-018-0045-4"},{"key":"e_1_3_2_1_2_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang Humen Zhong Yuanzhi Zhu Mingkun Yang Zhaohai Li Jianqiang Wan Pengfei Wang Wei Ding Zheren Fu Yiheng Xu Jiabo Ye Xi Zhang Tianbao Xie Zesen Cheng Hang Zhang Zhibo Yang Haiyang Xu and Junyang Lin. 2025. Qwen2.5-VL Technical Report. arXiv:2502.13923 [cs.CV] https:\/\/arxiv.org\/abs\/2502.13923"},{"key":"e_1_3_2_1_3_1","volume-title":"The 1st InterAI Workshop: Interactive AI for Human-centered Robotics.","author":"Bowser Shawn","year":"2024","unstructured":"Shawn Bowser and Stephanie M Lukin. 2024. 3D Gaussian Splatting for Human-Robot Interaction. In The 1st InterAI Workshop: Interactive AI for Human-centered Robotics."},{"key":"e_1_3_2_1_4_1","volume-title":"CERC2017","author":"Brenner Sven","year":"2017","unstructured":"Sven Brenner, Sebastian Gelfert, and Hendrik Rust. 2017. New approach in 3d mapping and localization for search and rescue missions. CERC2017 (2017), 105."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02496"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 1--10","author":"Das Abhishek","year":"2018","unstructured":"Abhishek Das, Samyak Datta, Georgia Gkioxari, Stefan Lee, Devi Parikh, and Dhruv Batra. 2018. Embodied question answering. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1--10."},{"key":"e_1_3_2_1_7_1","volume-title":"S-eqa: Tackling situational queries in embodied question answering. arXiv preprint arXiv:2405.04732","author":"Dorbala Vishnu Sashank","year":"2024","unstructured":"Vishnu Sashank Dorbala, Prasoon Goyal, Robinson Piramuthu, Michael Johnston, Reza Ghanadhan, and Dinesh Manocha. 2024. S-eqa: Tackling situational queries in embodied question answering. arXiv preprint arXiv:2405.04732 (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"FLAME 3 Dataset: Unleashing the Power of Radiometric Thermal UAV Imagery for Wildfire Management. arXiv preprint arXiv:2412.02831","author":"Hopkins Bryce","year":"2024","unstructured":"Bryce Hopkins, Leo ONeill, Michael Marinaccio, Eric Rowell, Russell Parsons, Sarah Flanary, Irtija Nazim, Carl Seielstad, and Fatemeh Afghah. 2024. FLAME 3 Dataset: Unleashing the Power of Radiometric Thermal UAV Imagery for Wildfire Management. arXiv preprint arXiv:2412.02831 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings IEEE International Conference on Computer Vision (ICCV).","author":"Federico Tombari Matthias Niessner Nassir Navab","year":"2019","unstructured":"Nassir Navab Federico Tombari Matthias Niessner Johanna Wald, Armen Avetisyan. 2019. RIO: 3D Object Instance Re-Localization in Changing Indoor Environments. In Proceedings IEEE International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2005.06.004"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.futured-1.4"},{"key":"e_1_3_2_1_13_1","volume-title":"Carlos Sanchez Amaro, Matthew Marge, Clare R Voss, and David Traum.","author":"Lukin Stephanie M","year":"2018","unstructured":"Stephanie M Lukin, Felix Gervits, Cory J Hayes, Anton Leuski, Pooja Moolchandani, John G Rogers III, Carlos Sanchez Amaro, Matthew Marge, Clare R Voss, and David Traum. 2018. Scoutbot: a dialogue system for collaborative navigation."},{"key":"e_1_3_2_1_14_1","unstructured":"Stephanie M. Lukin Jaci South and Shawn Bowser. 2024. CHRIS-Bot: A Robot for Dialogue and Scene Understanding of Anomalous Environments in Virtual Reality. Technical Report ARL-TR-9906. DEVCOM Army Research Laboratory."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01560"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02058"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634981"},{"key":"e_1_3_2_1_19_1","volume-title":"3d-grand: A million-scale dataset for 3d-llms with better grounding and less hallucination. arXiv preprint arXiv:2406.05132","author":"Yang Jianing","year":"2024","unstructured":"Jianing Yang, Xuweiyi Chen, Nikhil Madaan, Madhavan Iyengar, Shengyi Qian, David F Fouhey, and Joyce Chai. 2024. 3d-grand: A million-scale dataset for 3d-llms with better grounding and less hallucination. arXiv preprint arXiv:2406.05132 (2024)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00647"}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Chicago IL USA","acronym":"ICMR '25"},"container-title":["Proceedings of the 6th Workshop on Intelligent Cross-Data Analysis and Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3733566.3734430","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,31]],"date-time":"2025-07-31T16:19:54Z","timestamp":1753978794000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3733566.3734430"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":20,"alternative-id":["10.1145\/3733566.3734430","10.1145\/3733566"],"URL":"https:\/\/doi.org\/10.1145\/3733566.3734430","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}