{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:20:31Z","timestamp":1773771631431,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"name":"University of Macau","award":["SRG2023-00037-IOTSC"],"award-info":[{"award-number":["SRG2023-00037-IOTSC"]}]},{"name":"Science and Technology Development Fund of Macau SAR","award":["0021\/2022\/ITP, 0081\/2022\/A2, 001\/2024\/SKL"],"award-info":[{"award-number":["0021\/2022\/ITP, 0081\/2022\/A2, 001\/2024\/SKL"]}]},{"name":"Shenzhen-Hong Kong-Macau Science and Technology Program Category C","award":["SGDX20230821095159012"],"award-info":[{"award-number":["SGDX20230821095159012"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681326","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"8-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["When, Where, and What? A Benchmark for Accident Anticipation and Localization with Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0207-5087","authenticated-orcid":false,"given":"Haicheng","family":"Liao","sequence":"first","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9936-7786","authenticated-orcid":false,"given":"Yongkang","family":"Li","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7707-1505","authenticated-orcid":false,"given":"Chengyue","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7839-1976","authenticated-orcid":false,"given":"Yanchen","family":"Guan","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5816-6837","authenticated-orcid":false,"given":"Kahou","family":"Tam","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5220-1609","authenticated-orcid":false,"given":"Chunlin","family":"Tian","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2044-8289","authenticated-orcid":false,"given":"Li","family":"Li","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9480-0356","authenticated-orcid":false,"given":"Chengzhong","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0877-6829","authenticated-orcid":false,"given":"Zhenning","family":"Li","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413827"},{"key":"e_1_3_2_2_2_1","volume-title":"Deep Reinforced Accident Anticipation with Visual Explanation. In International Conference on Computer Vision (ICCV).","author":"Bao Wentao","year":"2021","unstructured":"Wentao Bao, Qi Yu, and Yu Kong. 2021. Deep Reinforced Accident Anticipation with Visual Explanation. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2021.106409"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00644"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_2_6_1","volume-title":"Asian Conference on Computer Vision. Springer, 136--153","author":"Chan Fu-Hsiang","year":"2016","unstructured":"Fu-Hsiang Chan, Yu-Ting Chen, Yu Xiang, and Min Sun. 2016. Anticipating accidents in dashcam videos. In Asian Conference on Computer Vision. Springer, 136--153."},{"key":"e_1_3_2_2_7_1","volume-title":"Computer Vision -- ACCV","author":"Chan Fu-Hsiang","year":"2016","unstructured":"Fu-Hsiang Chan, Yu-Ting Chen, Yu Xiang, and Min Sun. 2017. Anticipating Accidents in Dashcam Videos. In Computer Vision -- ACCV 2016. Springer International Publishing, Cham, 136--153."},{"key":"e_1_3_2_2_8_1","first-page":"17864","article-title":"Per-pixel classification is not all you need for semantic segmentation","volume":"34","author":"Cheng Bowen","year":"2021","unstructured":"Bowen Cheng, Alex Schwing, and Alexander Kirillov. 2021. Per-pixel classification is not all you need for semantic segmentation. Advances in Neural Information Processing Systems 34 (2021), 17864--17875.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_9_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_10_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"e_1_3_2_2_12_1","volume-title":"Scene Segmentation With Dual Relation-Aware Attention Network","author":"Fu Jun","year":"2020","unstructured":"Jun Fu, Jing Liu, Jie Jiang, Yong Li, Yongjun Bao, and Hanqing Lu. 2020. Scene Segmentation With Dual Relation-Aware Attention Network. IEEE Transactions on Neural Networks and Learning Systems (2020)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00326"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00607-z"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3398357"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548171"},{"key":"e_1_3_2_2_17_1","volume-title":"Lin (Eds.)","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 6840--6851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/ file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2019.105392"},{"key":"e_1_3_2_2_20_1","volume-title":"A hybrid modelling framework of machine learning and extreme value theory for crash risk estimation using traffic conflicts. Analytic methods in accident research 36","author":"Hussain Fizza","year":"2022","unstructured":"Fizza Hussain, Yuefeng Li, Ashutosh Arun, and Md Mazharul Haque. 2022. A hybrid modelling framework of machine learning and extreme value theory for crash risk estimation using traffic conflicts. Analytic methods in accident research 36 (2022), 100248."},{"key":"e_1_3_2_2_21_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3155613"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3155613"},{"key":"e_1_3_2_2_25_1","volume-title":"An Attention-guided Multistream Feature Fusion Network for Early Localization of Risky Traffic Agents in Driving Videos","author":"Karim Muhammad Monjurul","year":"2023","unstructured":"Muhammad Monjurul Karim, Zhaozheng Yin, and Ruwen Qin. 2023. An Attention-guided Multistream Feature Fusion Network for Early Localization of Risky Traffic Agents in Driving Videos. IEEE Transactions on Intelligent Vehicles (2023)."},{"key":"e_1_3_2_2_26_1","volume-title":"Uniformerv2: Spatiotemporal learning by arming image vits with video uniformer. arXiv preprint arXiv:2211.09552","author":"Li Kunchang","year":"2022","unstructured":"Kunchang Li, Yali Wang, Yinan He, Yizhuo Li, Yi Wang, Limin Wang, and Yu Qiao. 2022. Uniformerv2: Spatiotemporal learning by arming image vits with video uniformer. arXiv preprint arXiv:2211.09552 (2022)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.23919\/CHAIN.2024.100003"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2023.107019"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2024.107760"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3376074"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28900"},{"key":"e_1_3_2_2_32_1","volume-title":"2024 IEEE International Conference on Robotics and Automation (ICRA). IEEE, 14212--14219","author":"Liao Haicheng","year":"2024","unstructured":"Haicheng Liao, Shangqian Liu, Yongkang Li, Zhenning Li, Chengyue Wang, Yunjian Li, Shengbo Eben Li, and Chengzhong Xu. 2024. Human observationinspired trajectory prediction for autonomous driving in mixed-autonomy traffic environments. In 2024 IEEE International Conference on Robotics and Automation (ICRA). IEEE, 14212--14219."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.commtr.2023.100116"},{"key":"e_1_3_2_2_34_1","volume-title":"CRASH: Crash Recognition and Anticipation System Harnessing with Context-Aware and Temporal Focus Attentions. arXiv:2407.17757 [cs.CV] https:\/\/arxiv.org\/abs\/2407.17757","author":"Liao Haicheng","year":"2024","unstructured":"Haicheng Liao, Haoyu Sun, Huanming Shen, ChengyueWang, Kahou Tam, Chunlin Tian, Li Li, Chengzhong Xu, and Zhenning Li. 2024. CRASH: Crash Recognition and Anticipation System Harnessing with Context-Aware and Temporal Focus Attentions. arXiv:2407.17757 [cs.CV] https:\/\/arxiv.org\/abs\/2407.17757"},{"key":"e_1_3_2_2_35_1","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong Jae Lee. 2024. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01--30-llava-next\/"},{"key":"e_1_3_2_2_36_1","unstructured":"Haotian Liu Chunyuan Li QingyangWu and Yong Jae Lee. 2023. Visual Instruction Tuning."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3416298"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.03.075"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548165"},{"key":"e_1_3_2_2_41_1","unstructured":"Jiageng Mao Yuxi Qian Junjie Ye Hang Zhao and Yue Wang. 2023. GPT-Driver: Learning to Drive with GPT. arXiv:2310.01415 [cs.CV]"},{"key":"e_1_3_2_2_42_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2021.106090"},{"key":"e_1_3_2_2_44_1","volume-title":"Garnett (Eds.)","volume":"30","author":"Sabour Sara","year":"2017","unstructured":"Sara Sabour, Nicholas Frosst, and Geoffrey E Hinton. 2017. Dynamic Routing Between Capsules. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H.Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_ files\/paper\/2017\/file\/2cad8fa47bbef282badbb8de5374b894-Paper.pdf"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"crossref","unstructured":"Hao Shao Yuxuan Hu LetianWang Steven L.Waslander Yu Liu and Hongsheng Li. 2023. LMDrive: Closed-Loop End-to-End Driving with Large Language Models. arXiv:2312.07488 [cs.CV]","DOI":"10.1109\/CVPR52733.2024.01432"},{"key":"e_1_3_2_2_47_1","volume-title":"Anticipating Traffic Accidents with Adaptive Loss and Large-Scale Incident DB. In 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 3521--3529","author":"Suzuki T.","year":"2018","unstructured":"T. Suzuki, H. Kataoka, Y. Aoki, and Y. Satoh. 2018. Anticipating Traffic Accidents with Adaptive Loss and Large-Scale Incident DB. In 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 3521--3529. https:\/\/doi.org\/10. 1109\/CVPR.2018.00371"},{"key":"e_1_3_2_2_48_1","volume-title":"Anticipating Traffic Accidents with Adaptive Loss and Large-Scale Incident DB. 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Suzuki Tomoyuki","year":"2018","unstructured":"Tomoyuki Suzuki, Hirokatsu Kataoka, Yoshimitsu Aoki, and Yutaka Satoh. 2018. Anticipating Traffic Accidents with Adaptive Loss and Large-Scale Incident DB. 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2018), 3521--3529. https:\/\/api.semanticscholar.org\/CorpusID:4713643"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109567"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00736"},{"key":"e_1_3_2_2_51_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_52_1","volume-title":"Chatgpt as your vehicle co-pilot: An initial attempt","author":"Wang Shiyi","year":"2023","unstructured":"Shiyi Wang, Yuxuan Zhu, Zhiheng Li, Yutong Wang, Li Li, and Zhengbing He. 2023. Chatgpt as your vehicle co-pilot: An initial attempt. IEEE Transactions on Intelligent Vehicles (2023)."},{"key":"e_1_3_2_2_53_1","volume-title":"GSC: A Graph and Spatio-temporal Continuity Based Framework for Accident Anticipation","author":"Wang Tianhang","year":"2023","unstructured":"Tianhang Wang, Kai Chen, Guang Chen, Bin Li, Zhijun Li, Zhengfa Liu, and Changjun Jiang. 2023. GSC: A Graph and Spatio-temporal Continuity Based Framework for Accident Anticipation. IEEE Transactions on Intelligent Vehicles (2023)."},{"key":"e_1_3_2_2_54_1","unstructured":"WenhaiWang Jiangwei Xie ChuanYang Hu Haoming Zou Jianan Fan Wenwen Tong Yang Wen Silei Wu Hanming Deng Zhiqi Li et al. 2023. DriveMLM: Aligning Multi-Modal Large Language Models with Behavioral Planning States for Autonomous Driving. arXiv preprint arXiv:2312.09245 (2023)."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2807402"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967556"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350899"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.146"},{"key":"e_1_3_2_2_59_1","volume-title":"Real-time crash potential prediction on freeways using connected vehicle data. Analytic methods in accident research 36","author":"Zhang Shile","year":"2022","unstructured":"Shile Zhang and Mohamed Abdel-Aty. 2022. Real-time crash potential prediction on freeways using connected vehicle data. Analytic methods in accident research 36 (2022), 100239."},{"key":"e_1_3_2_2_60_1","volume-title":"Trafficgpt: Viewing, processing and interacting with traffic foundation models. Transport Policy","author":"Zhang Siyao","year":"2024","unstructured":"Siyao Zhang, Daocheng Fu,Wenzhe Liang, Zhao Zhang, Bin Yu, Pinlong Cai, and Baozhen Yao. 2024. Trafficgpt: Viewing, processing and interacting with traffic foundation models. Transport Policy (2024)."},{"key":"e_1_3_2_2_61_1","volume-title":"T-gcn: A temporal graph convolutional network for traffic prediction","author":"Zhao Ling","year":"2019","unstructured":"Ling Zhao, Yujiao Song, Chao Zhang, Yu Liu, Pu Wang, Tao Lin, Min Deng, and Haifeng Li. 2019. T-gcn: A temporal graph convolutional network for traffic prediction. IEEE transactions on intelligent transportation systems 21, 9 (2019), 3848--3858."},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123451"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681326","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:43Z","timestamp":1750295863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681326"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":62,"alternative-id":["10.1145\/3664647.3681326","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681326","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}