{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:43Z","timestamp":1765339483479,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"name":"the Science and Technology Development Fund of Macau","award":["[0122\/2024\/RIB2, 0215\/2024\/AGJ, 001\/2024\/SKL]"],"award-info":[{"award-number":["[0122\/2024\/RIB2, 0215\/2024\/AGJ, 001\/2024\/SKL]"]}]},{"name":"the Research Services and Knowledge Transfer Office, University of Macau","award":["[SRG2023-00037-IOTSC, MYRG-GRG2024-00284-IOTSC]"],"award-info":[{"award-number":["[SRG2023-00037-IOTSC, MYRG-GRG2024-00284-IOTSC]"]}]},{"name":"the Shenzhen-Hong Kong-Macau Science and Technology Program Category C","award":["[SGDX20230821095159012]"],"award-info":[{"award-number":["[SGDX20230821095159012]"]}]},{"name":"the State Key Lab of Intelligent Transportation System","award":["[2024-B001]"],"award-info":[{"award-number":["[2024-B001]"]}]},{"name":"the Jiangsu Provincial Science and Technology Program","award":["[BZ2024055]"],"award-info":[{"award-number":["[BZ2024055]"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755378","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"12054-12063","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Eyes on the Road, Mind Beyond Vision: Context-Aware Multi-modal Enhanced Risk Anticipation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3234-2569","authenticated-orcid":false,"given":"Jiaxun","family":"Zhang","sequence":"first","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macau, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0207-5087","authenticated-orcid":false,"given":"Haicheng","family":"Liao","sequence":"additional","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macau, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1495-5762","authenticated-orcid":false,"given":"Yumu","family":"Xie","sequence":"additional","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macau, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7707-1505","authenticated-orcid":false,"given":"Chengyue","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macau, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7839-1976","authenticated-orcid":false,"given":"Yanchen","family":"Guan","sequence":"additional","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macau, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4907-914X","authenticated-orcid":false,"given":"Bin","family":"Rao","sequence":"additional","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macau, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0877-6829","authenticated-orcid":false,"given":"Zhenning","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Lab of IoT for Smart City, University of Macao, Macau SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2024.103563"},{"key":"e_1_3_2_1_2_1","volume-title":"Spatio-temporal Analysis of Dashboard Camera Videos for Time-To-Accident Forecasting. In 2023 International Joint Conference on Neural Networks (IJCNN). IEEE, 1-8.","author":"Anjum Taif","year":"2023","unstructured":"Taif Anjum, Daya Kumar, and Apurva Narayan. 2023. Spatio-temporal Analysis of Dashboard Camera Videos for Time-To-Accident Forecasting. In 2023 International Joint Conference on Neural Networks (IJCNN). IEEE, 1-8."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/wevj16020082"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413827"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00752"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2022.106923"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2024.107836"},{"key":"e_1_3_2_1_8_1","volume-title":"GACNet: Interactive Prediction of Surrounding Vehicles Behavior under High Collision Risk. Advanced Intelligent Systems","author":"Chai Jingzheng","year":"2025","unstructured":"Jingzheng Chai, Jianting Liu, Jingluo Huang, and Chunyan Huang. 2025. GACNet: Interactive Prediction of Surrounding Vehicles Behavior under High Collision Risk. Advanced Intelligent Systems (2025), 2401040."},{"key":"e_1_3_2_1_9_1","first-page":"136","volume-title":"Taipei","author":"Chan Fu-Hsiang","year":"2017","unstructured":"Fu-Hsiang Chan, Yu-Ting Chen, Yu Xiang, and Min Sun. 2017. Anticipating accidents in dashcam videos. In Computer Vision-ACCV 2016: 13th Asian Conference on Computer Vision, Taipei, Taiwan, November 20-24, 2016, Revised Selected Papers, Part IV 13. Springer, 136-153."},{"volume-title":"Driving Green Transportation System Through Artificial Intelligence and Automation: Approaches, Technologies and Applications","author":"Choudhury Nobhonil Roy","key":"e_1_3_2_1_10_1","unstructured":"Nobhonil Roy Choudhury, Sreeja Bhattacharjee, Saptarsi Ghosh, Shivnath Ghosh, and Pranashi Chakraborty. 2025. Intelligent Traffic Management and Accident Prevention System with Vehicle Counting and Distance-Based Brake Control. In Driving Green Transportation System Through Artificial Intelligence and Automation: Approaches, Technologies and Applications. Springer, 171-201."},{"key":"e_1_3_2_1_11_1","first-page":"111","article-title":"Flamingo: A lightweight visual-language model for real-time applications","volume":"58","author":"Chowdhury N.","year":"2023","unstructured":"N. Chowdhury, R. Patel, and V. Kumar. 2023. Flamingo: A lightweight visual-language model for real-time applications. Journal of Artificial Intelligence Research, Vol. 58, 2 (2023), 111-129.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2024.109611"},{"key":"e_1_3_2_1_13_1","volume-title":"DADA: Driver attention prediction in driving accident scenarios","author":"Fang Jianwu","year":"2021","unstructured":"Jianwu Fang, Dingxin Yan, Jiahuan Qiao, Jianru Xue, and Hongkai Yu. 2021. DADA: Driver attention prediction in driving accident scenarios. IEEE transactions on intelligent transportation systems, Vol. 23, 6 (2021), 4959-4971."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ress.2025.110804"},{"volume-title":"VLM Q-Learning: Aligning Vision-Language Models for Interactive Decision-Making","author":"Grigsby Jake","key":"e_1_3_2_1_15_1","unstructured":"Jake Grigsby, Yuke Zhu, Michael S Ryoo, and Juan Carlos Niebles. [n.d.]. VLM Q-Learning: Aligning Vision-Language Models for Interactive Decision-Making. In Scaling Self-Improving Foundation Models without Human Supervision."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2025.3534233"},{"volume-title":"VLM-Auto: VLM-based Autonomous Driving Assistant with Human-like Behavior and Understanding for Complex Road Scenes. In 2024 2nd International Conference on Foundation and Large Language Models (FLLM)","author":"Guo Ziang","key":"e_1_3_2_1_17_1","unstructured":"Ziang Guo, Zakhar Yagudin, Artem Lykov, Mikhail Konenkov, and Dzmitry Tsetserukou. 2024. VLM-Auto: VLM-based Autonomous Driving Assistant with Human-like Behavior and Understanding for Complex Road Scenes. In 2024 2nd International Conference on Foundation and Large Language Models (FLLM). IEEE, 501-507."},{"key":"e_1_3_2_1_18_1","volume-title":"Revisiting Birds Eye View Perception Models with Frozen Foundation Models: DINOv2 and Metric3Dv2. arXiv preprint arXiv:2501.08118","author":"Hayes Seamie","year":"2025","unstructured":"Seamie Hayes, Ganesh Sistu, and Ciaran Eising. 2025. Revisiting Birds Eye View Perception Models with Frozen Foundation Models: DINOv2 and Metric3Dv2. arXiv preprint arXiv:2501.08118 (2025)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01353"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128743"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trf.2024.11.022"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IV55156.2024.10588373"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3155613"},{"key":"e_1_3_2_1_24_1","first-page":"29914","article-title":"Segment anything in high quality","volume":"36","author":"Ke Lei","year":"2023","unstructured":"Lei Ke, Mingqiao Ye, Martin Danelljan, Yu-Wing Tai, Chi-Keung Tang, Fisher Yu, et al., 2023. Segment anything in high quality. Advances in Neural Information Processing Systems, Vol. 36 (2023), 29914-29934.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trf.2025.02.003"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW65960.2025.00127"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijhcs.2024.103366"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2024.3378460"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2024.109821"},{"key":"e_1_3_2_1_30_1","volume-title":"Cot-drive: Efficient motion forecasting for autonomous driving with llms and chain-of-thought prompting. arXiv preprint arXiv:2503.07234","author":"Liao Haicheng","year":"2025","unstructured":"Haicheng Liao, Hanlin Kong, Bonan Wang, Chengyue Wang, Wang Ye, Zhengbing He, Chengzhong Xu, and Zhenning Li. 2025a. Cot-drive: Efficient motion forecasting for autonomous driving with llms and chain-of-thought prompting. arXiv preprint arXiv:2503.07234 (2025)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2024.107760"},{"key":"e_1_3_2_1_32_1","volume-title":"Shengbo Eben Li, and Chengzhong Xu","author":"Liao Haicheng","year":"2024","unstructured":"Haicheng Liao, Yongkang Li, Zhenning Li, Chengyue Wang, Zhiyong Cui, Shengbo Eben Li, and Chengzhong Xu. 2024b. A Cognitive-Based Trajectory Prediction Approach for Autonomous Driving. IEEE Transactions on Intelligent Vehicles (2024)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681326"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.commtr.2023.100116"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680672"},{"key":"e_1_3_2_1_36_1","volume-title":"Chengzhong Xu, and Zhenning Li.","author":"Liao Haicheng","year":"2025","unstructured":"Haicheng Liao, Chengyue Wang, Kaiqun Zhu, Yilong Ren, Bolin Gao, Shengbo Eben Li, Chengzhong Xu, and Zhenning Li. 2025b. Minds on the move: Decoding trajectory prediction in autonomous driving with cognitive insights. IEEE Transactions on Intelligent Transportation Systems (2025)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2024.107520"},{"key":"e_1_3_2_1_38_1","volume-title":"VLM-Guard: Safeguarding Vision-Language Models via Fulfilling Safety Alignment Gap. arXiv preprint arXiv:2502.10486","author":"Liu Qin","year":"2025","unstructured":"Qin Liu, Fei Wang, Chaowei Xiao, and Muhao Chen. 2025. VLM-Guard: Safeguarding Vision-Language Models via Fulfilling Safety Alignment Gap. arXiv preprint arXiv:2502.10486 (2025)."},{"key":"e_1_3_2_1_39_1","volume-title":"European Conference on Computer Vision. Springer, 38-55","author":"Liu Shilong","year":"2024","unstructured":"Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Qing Jiang, Chunyuan Li, Jianwei Yang, Hang Su, et al., 2024. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. In European Conference on Computer Vision. Springer, 38-55."},{"key":"e_1_3_2_1_40_1","volume-title":"International Conference on Machine Learning. PMLR, 14743-14752","author":"Mahmoudieh Parsa","year":"2022","unstructured":"Parsa Mahmoudieh, Deepak Pathak, and Trevor Darrell. 2022. Zero-shot reward specification via grounded natural language. In International Conference on Machine Learning. PMLR, 14743-14752."},{"key":"e_1_3_2_1_41_1","volume-title":"Ruhul Amin Khalil, and Nasir Saeed","author":"Mahmud Doaa","year":"2025","unstructured":"Doaa Mahmud, Hadeel Hajmohamed, Shamma Almentheri, Shamma Alqaydi, Lameya Aldhaheri, Ruhul Amin Khalil, and Nasir Saeed. 2025. Integrating LLMs with ITS: Recent Advances, Potentials, Challenges, and Future Directions. IEEE Transactions on Intelligent Transportation Systems (2025)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"e_1_3_2_1_43_1","volume-title":"Towards explainable artificial intelligence (XAI) for early anticipation of traffic accidents. arXiv e-prints","author":"Karim Muhammad Monjurul","year":"2021","unstructured":"Muhammad Monjurul Karim, Yu Li, and Ruwen Qin. 2021. Towards explainable artificial intelligence (XAI) for early anticipation of traffic accidents. arXiv e-prints (2021), arXiv-2108."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/OJITS.2025.3554387"},{"key":"e_1_3_2_1_45_1","unstructured":"National Highway Traffic Safety Administration. 2023. NHTSA Early Estimates: 2022 Traffic Crash Deaths. https:\/\/www.nhtsa.gov\/press-releases\/traffic-crash-death-estimates-2022. Accessed: 2025-03-04."},{"volume-title":"Global status report on road safety 2023: summary","author":"World Health Organization","key":"e_1_3_2_1_46_1","unstructured":"World Health Organization. 2023. Global status report on road safety 2023: summary. World Health Organization."},{"key":"e_1_3_2_1_47_1","volume-title":"Lightweight accident detection model for autonomous fleets based on GPS data. Transportation research procedia","author":"Papadopoulos Alexandros","year":"2024","unstructured":"Alexandros Papadopoulos, Athanasios Sersemis, Georgios Spanos, Antonios Lalas, Christos Liaskos, Konstantinos Votis, and Dimitrios Tzovaras. 2024. Lightweight accident detection model for autonomous fleets based on GPS data. Transportation research procedia, Vol. 78 (2024), 16-23."},{"key":"e_1_3_2_1_48_1","first-page":"8748","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. Proceedings of the International Conference on Machine Learning (ICML)","volume":"139","author":"Radford A.","year":"2021","unstructured":"A. Radford, J. W. Kim, C. Hallacy, A. Ramesh, G. Goh, S. Agarwal, I. Sutskever, T. Salimans, and D. Amodei. 2021. Learning Transferable Visual Models From Natural Language Supervision. Proceedings of the International Conference on Machine Learning (ICML), Vol. 139 (2021), 8748-8763. https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW65960.2025.00119"},{"key":"e_1_3_2_1_50_1","unstructured":"Xiao Teng Zhenjiang Shen Lin Huang Hui Li and Wankai Li. [n.d.]. Improving Intelligent Perception and Decision Optimization of Pedestrian Crossing Scenarios in Autonomous Driving Environments Through Large Visual Language Models. Available at SSRN 5112947 ( [n. d.])."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIM.2024.10505198"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.3390\/app14177455"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3257169"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.trf.2024.12.010"},{"key":"e_1_3_2_1_55_1","volume-title":"Driving factors and spatio-temporal evolution of tourist accident clusters in Chinese outbound tourism. Current Issues in Tourism","author":"Xie Chaowu","year":"2025","unstructured":"Chaowu Xie, Kun Zhang, Jiangchi Zhang, and Songshan Huang. 2025. Driving factors and spatio-temporal evolution of tourist accident clusters in Chinese outbound tourism. Current Issues in Tourism (2025), 1-19."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967556"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.146"},{"key":"e_1_3_2_1_58_1","volume-title":"LATTE: Lightweight Attention-based Traffic Accident Anticipation Engine. arXiv:2504.04103 [cs.CE] https:\/\/arxiv.org\/abs\/2504.04103","author":"Zhang Jiaxun","year":"2025","unstructured":"Jiaxun Zhang, Yanchen Guan, Chengyue Wang, Haicheng Liao, Guohui Zhang, and Zhenning Li. 2025. LATTE: Lightweight Attention-based Traffic Accident Anticipation Engine. arXiv:2504.04103 [cs.CE] https:\/\/arxiv.org\/abs\/2504.04103"},{"key":"e_1_3_2_1_59_1","first-page":"11","article-title":"Traffic Accident Prediction Method Based on Local Relational Features and Attention Mechanisms","volume":"45","author":"Yahui ZHANG","year":"2025","unstructured":"Yahui ZHANG, Ying LI, and Tianen LIU. 2025. Traffic Accident Prediction Method Based on Local Relational Features and Attention Mechanisms. Transactions of Beijing institute of Technology, Vol. 45, 1 (2025), 11-18.","journal-title":"Transactions of Beijing institute of Technology"},{"key":"e_1_3_2_1_60_1","first-page":"45137","article-title":"Tame a wild camera: in-the-wild monocular camera calibration","volume":"36","author":"Zhu Shengjie","year":"2023","unstructured":"Shengjie Zhu, Abhinav Kumar, Masa Hu, and Xiaoming Liu. 2023. Tame a wild camera: in-the-wild monocular camera calibration. Advances in Neural Information Processing Systems, Vol. 36 (2023), 45137-45149.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755378","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:00:59Z","timestamp":1765339259000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755378"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":60,"alternative-id":["10.1145\/3746027.3755378","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755378","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}