{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:57:31Z","timestamp":1781539051840,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810743","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1542-1546","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TD-CoT: Bridging the Holistic-Atomic Gap for Training-Free Temporal Reversal Detection in Video LVLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9349-0642","authenticated-orcid":false,"given":"Pengfei","family":"Huang","sequence":"first","affiliation":[{"name":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3193-8531","authenticated-orcid":false,"given":"Xuezhen","family":"Hou","sequence":"additional","affiliation":[{"name":"School of Mathematics and Physics, University of Science and Technology Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2412.05271"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02245"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305518"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Wonkyun Kim Changin Choi Wonseok Lee and Wonjong Rhee. 2024. An Image Grid Can Be Worth a Video: Zero-shot Video Question Answering Using a VLM. IEEE Access 12 (2024) 193057\u2013193075.","DOI":"10.1109\/ACCESS.2024.3517625"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2602.00288"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02095"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.342"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2403.00476"},{"key":"e_1_3_3_1_10_2","unstructured":"Ruipu Luo Ziwang Zhao Min Yang Zheming Yang Minghui Qiu Zhongyu Wei Yanhao Wang and Cen Chen. 2023. Valley: Video Assistant with Large Language Model Enhanced Ability. ACM Transactions on Multimedia Computing Communications and Applications (2023)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.679"},{"key":"e_1_3_3_1_12_2","unstructured":"Qwen Team. 2025. Qwen2.5-VL: A Scalable Vision-Language Model. https:\/\/qwenlm.github.io\/blog\/qwen2.5-vl\/ Technical blog post."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2511.10979"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00311"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc Le and Denny Zhou. 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. Advances in Neural Information Processing Systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"e_1_3_3_1_17_2","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang et\u00a0al. 2024. Qwen2.5 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2412.15115\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2412.15115"},{"key":"e_1_3_3_1_18_2","first-page":"76749","volume-title":"Advances in Neural Information Processing Systems","author":"Yu Shoubin","year":"2023","unstructured":"Shoubin Yu, Jaemin Cho, Prateek Yadav, and Mohit Bansal. 2023. Self-Chained Image-Language Model for Video Localization and Question Answering. In Advances in Neural Information Processing Systems , Vol.\u00a036. 76749\u201376771. (SeViLA)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-naacl.51"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2205.10625"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:39:06Z","timestamp":1781537946000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810743"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":19,"alternative-id":["10.1145\/3805622.3810743","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810743","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}