{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:26:48Z","timestamp":1765308408649,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","funder":[{"name":"Institute of Information & Communications Technology Planning & Evaluation","award":["RS-2024-00439020, RS-2025-02283048"],"award-info":[{"award-number":["RS-2024-00439020, RS-2025-02283048"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755074","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:50:47Z","timestamp":1761371447000},"page":"3399-3407","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["B4DL: A Benchmark for 4D LiDAR LLM in Spatio-Temporal Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6670-7133","authenticated-orcid":false,"given":"Changho","family":"Choi","sequence":"first","affiliation":[{"name":"Korea Advanced Institute of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9608-9547","authenticated-orcid":false,"given":"Youngwoo","family":"Shin","sequence":"additional","affiliation":[{"name":"Korea Advanced Institute of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7905-0045","authenticated-orcid":false,"given":"Gyojin","family":"Han","sequence":"additional","affiliation":[{"name":"Korea Advanced Institute of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1800-5878","authenticated-orcid":false,"given":"Dong-Jae","family":"Lee","sequence":"additional","affiliation":[{"name":"Korea Advanced Institute of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7174-7932","authenticated-orcid":false,"given":"Junmo","family":"Kim","sequence":"additional","affiliation":[{"name":"Korea Advanced Institute of Science and Technology, Daejeon, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65-72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65-72."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"e_1_3_2_1_5_1","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171-4186."},{"key":"e_1_3_2_1_6_1","volume-title":"Vision meets robotics: The kitti dataset. The international journal of robotics research","author":"Geiger Andreas","year":"2013","unstructured":"Andreas Geiger, Philip Lenz, Christoph Stiller, and Raquel Urtasun. 2013. Vision meets robotics: The kitti dataset. The international journal of robotics research, Vol. 32, 11 (2013), 1231-1237."},{"key":"e_1_3_2_1_7_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00217"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00727"},{"key":"e_1_3_2_1_10_1","first-page":"20482","article-title":"3d-llm: Injecting the 3d world into large language models","volume":"36","author":"Hong Yining","year":"2023","unstructured":"Yining Hong, Haoyu Zhen, Peihao Chen, Shuhong Zheng, Yilun Du, Zhenfang Chen, and Chuang Gan. 2023. 3d-llm: Injecting the 3d world into large language models. Advances in Neural Information Processing Systems, Vol. 36 (2023), 20482-20494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., 2022. Lora: Low-rank adaptation of large language models. ICLR, Vol. 1, 2 (2022), 3.","journal-title":"ICLR"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01353"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i21.30570"},{"key":"e_1_3_2_1_14_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02030"},{"key":"e_1_3_2_1_16_1","volume-title":"Video-chatgpt: Towards detailed video understanding via large vision and language models. arXiv preprint arXiv:2306.05424","author":"Maaz Muhammad","year":"2023","unstructured":"Muhammad Maaz, Hanoona Rasheed, Salman Khan, and Fahad Shahbaz Khan. 2023. Video-chatgpt: Towards detailed video understanding via large vision and language models. arXiv preprint arXiv:2306.05424 (2023)."},{"key":"e_1_3_2_1_17_1","unstructured":"Jiageng Mao Minzhe Niu Chenhan Jiang Hanxue Liang Jingheng Chen Xiaodan Liang Yamin Li Chaoqiang Ye Wei Zhang Zhenguo Li et al. 2021. One million scenes for autonomous driving: Once dataset. arXiv preprint arXiv:2106.11037 (2021)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72980-5_15"},{"key":"e_1_3_2_1_19_1","first-page":"3819","article-title":"K-radar: 4d radar object detection for autonomous driving in various weather conditions","volume":"35","author":"Paek Dong-Hee","year":"2022","unstructured":"Dong-Hee Paek, Seung-Hyun Kong, and Kevin Tirta Wijaya. 2022. K-radar: 4d radar object detection for autonomous driving in various weather conditions. Advances in Neural Information Processing Systems, Vol. 35 (2022), 3819-3829.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311-318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311-318."},{"key":"e_1_3_2_1_21_1","volume-title":"NuScenes-QA: A Multi-modal Visual Question Answering Benchmark for Autonomous Driving Scenario. arXiv preprint arXiv:2305.14836","author":"Qian Tianwen","year":"2023","unstructured":"Tianwen Qian, Jingjing Chen, Linhai Zhuo, Yang Jiao, and Yu-Gang Jiang. 2023. NuScenes-QA: A Multi-modal Visual Question Answering Benchmark for Autonomous Driving Scenario. arXiv preprint arXiv:2305.14836 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_23_1","volume-title":"DriveLM: Driving with Graph Visual Question Answering. arXiv preprint arXiv:2312.14150","author":"Sima Chonghao","year":"2023","unstructured":"Chonghao Sima, Katrin Renz, Kashyap Chitta, Li Chen, Hanxue Zhang, Chengen Xie, Ping Luo, Andreas Geiger, and Hongyang Li. 2023. DriveLM: Driving with Graph Visual Question Answering. arXiv preprint arXiv:2312.14150 (2023)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"e_1_3_2_1_25_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth Katie Millican et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"Drivegpt4: Interpretable end-to-end autonomous driving via large language model","author":"Xu Zhenhua","year":"2024","unstructured":"Zhenhua Xu, Yujia Zhang, Enze Xie, Zhen Zhao, Yong Guo, Kwan-Yee K Wong, Zhenguo Li, and Hengshuang Zhao. 2024. Drivegpt4: Interpretable end-to-end autonomous driving via large language model. IEEE Robotics and Automation Letters (2024)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02558"},{"key":"e_1_3_2_1_28_1","volume-title":"Lidar-llm: Exploring the potential of large language models for 3d lidar understanding. arXiv preprint arXiv:2312.14074","author":"Yang Senqiao","year":"2023","unstructured":"Senqiao Yang, Jiaming Liu, Ray Zhang, Mingjie Pan, Zoey Guo, Xiaoqi Li, Zehui Chen, Peng Gao, Yandong Guo, and Shanghang Zhang. 2023. Lidar-llm: Exploring the potential of large language models for 3d lidar understanding. arXiv preprint arXiv:2312.14074 (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00836"},{"key":"e_1_3_2_1_30_1","first-page":"46595","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","volume":"36","author":"Zheng Lianmin","year":"2023","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric Xing, et al., 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in Neural Information Processing Systems, Vol. 36 (2023), 46595-46623.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00249"},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the 42nd International Conference on Machine Learning (ICML).","author":"Zhu Yuchang","year":"2025","unstructured":"Yuchang Zhu, Huizhe Zhang, Bingzhe Wu, Jintang Li, Zibin Zheng, Peilin Zhao, Liang Chen, and Yatao Bian. 2025. Measuring Diversity in Synthetic Datasets. In Proceedings of the 42nd International Conference on Machine Learning (ICML)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755074","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:23:13Z","timestamp":1765308193000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755074"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":32,"alternative-id":["10.1145\/3746027.3755074","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755074","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}