{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:09:20Z","timestamp":1765339760288,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","funder":[{"name":"U.S. DARPA ECOLE","award":["HR00112390063"],"award-info":[{"award-number":["HR00112390063"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754479","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"13528-13530","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CReLeRI: Explainable, Concept-centric, Representation, Learning, Reasoning, and Interaction Video Analysis System"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2453-0933","authenticated-orcid":false,"given":"Michael Francis","family":"Perez","sequence":"first","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7034-6285","authenticated-orcid":false,"given":"Yichi","family":"Yang","sequence":"additional","affiliation":[{"name":"UCSD, San Diego, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3489-8103","authenticated-orcid":false,"given":"Yuheng","family":"Zha","sequence":"additional","affiliation":[{"name":"UCSD, San Diego, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8094-8997","authenticated-orcid":false,"given":"Enze","family":"Ma","sequence":"additional","affiliation":[{"name":"UCSD, San Diego, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5850-1133","authenticated-orcid":false,"given":"Danish","family":"Tamboli","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4187-9958","authenticated-orcid":false,"given":"Haodi","family":"Ma","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0196-2199","authenticated-orcid":false,"given":"Reza","family":"Shahriari","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5018-8203","authenticated-orcid":false,"given":"Vyom","family":"Pathak","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6156-4964","authenticated-orcid":false,"given":"Dzmitry","family":"Kasinets","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8484-3915","authenticated-orcid":false,"given":"Rohith","family":"Venkatakrishnan","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8234-5482","authenticated-orcid":false,"given":"Daisy (Zhe)","family":"Wang","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9139-6172","authenticated-orcid":false,"given":"Jaime","family":"Ruiz","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7192-3457","authenticated-orcid":false,"given":"Eric D.","family":"Ragan","sequence":"additional","affiliation":[{"name":"UF, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6239-5031","authenticated-orcid":false,"given":"Zhiting","family":"Hu","sequence":"additional","affiliation":[{"name":"UCSD, San Diego, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3683-4280","authenticated-orcid":false,"given":"Eric","family":"Xing","sequence":"additional","affiliation":[{"name":"CMU, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8504-3410","authenticated-orcid":false,"given":"Jun-Yan","family":"Zhu","sequence":"additional","affiliation":[{"name":"CMU, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Meta AI. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katie Millicah, Malcolm Reynolds, Roman Ring, Eliza Rutherford, Serkan Cabi, Tengda Han, Zhitao Gong, Sina Samangooei, Marianne Monteiro, Jacob Menick, Sebastian Borgeaud, Andrew Brock, Aida Nematzadeh, Sahand Sharifzadeh, Mikolaj Binkowski, Ricardo Barreira, Oriol Vinyals, Andrew Zisserman, and Karen Simonyan. 2022. Flamingo: a visual language model for few-shot learning. In Proceedings of the 36th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS '22). Curran Associates Inc., Red Hook, NY, USA, Article 1723, 21 pages."},{"key":"e_1_3_2_2_3_1","volume-title":"Depth Pro: Sharp Monocular Metric Depth in Less Than a Second. arXiv:2410.02073 [cs.CV] https:\/\/arxiv.org\/abs\/2410.02073","author":"Bochkovskii Aleksei","year":"2024","unstructured":"Aleksei Bochkovskii, Ama\u00ebl Delaunoy, Hugo Germain, Marcel Santos, Yichao Zhou, Stephan R. Richter, and Vladlen Koltun. 2024. Depth Pro: Sharp Monocular Metric Depth in Less Than a Second. arXiv:2410.02073 [cs.CV] https:\/\/arxiv.org\/abs\/2410.02073"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00018"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3240881"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/3001460.3001507"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.3390\/app13116582"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3703155"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2023.3296451"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.342"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01759"},{"key":"e_1_3_2_2_13_1","volume-title":"Introducing GPT-4V: Multimodal Capabilities in GPT-4. OpenAI Blog. https:\/\/openai.com\/research\/gpt-4v Accessed","author":"AI.","year":"2025","unstructured":"OpenAI. 2023. Introducing GPT-4V: Multimodal Capabilities in GPT-4. OpenAI Blog. https:\/\/openai.com\/research\/gpt-4v Accessed: March 13, 2025."},{"key":"e_1_3_2_2_14_1","unstructured":"Yansong Peng Hebei Li Peixi Wu Yueyi Zhang Xiaoyan Sun and Feng Wu. 2024. D-FINE: Redefine Regression Task in DETRs as Fine-grained Distribution Refinement. arXiv:2410.13842 [cs.CV] https:\/\/arxiv.org\/abs\/2410.13842"},{"key":"e_1_3_2_2_15_1","unstructured":"Qwen: An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei Huan Lin Jian Yang Jianhong Tu Jianwei Zhang Jianxin Yang Jiaxi Yang Jingren Zhou Junyang Lin Kai Dang Keming Lu Keqin Bao Kexin Yang Le Yu Mei Li Mingfeng Xue Pei Zhang Qin Zhu Rui Men Runji Lin Tianhao Li Tianyi Tang Tingyu Xia Xingzhang Ren Xuancheng Ren Yang Fan Yang Su Yichang Zhang Yu Wan Yuqiong Liu Zeyu Cui Zhenru Zhang and Zihan Qiu. 2025. Qwen2.5 Technical Report. arXiv:2412.15115 [cs.CL] https:\/\/arxiv.org\/abs\/2412.15115"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops. 8176-8181","author":"Saha Avinab","year":"2024","unstructured":"Avinab Saha, Shashank Gupta, Sravan Kumar Ankireddy, Karl Chahine, and Joydeep Ghosh. 2024. Exploring Explainability in Video Action Recognition. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops. 8176-8181."},{"key":"e_1_3_2_2_17_1","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Song Kaitao","year":"2020","unstructured":"Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, and Tie-Yan Liu. 2020. MPNet: masked and permuted pre-training for language understanding. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS '20). Curran Associates Inc., Red Hook, NY, USA, Article 1414, 11 pages."},{"volume-title":"Hours of video uploaded to YouTube every minute as of","year":"2022","key":"e_1_3_2_2_18_1","unstructured":"YouTube, and Google. 2022. Hours of video uploaded to YouTube every minute as of February 2022. https:\/\/www.statista.com\/statistics\/259477\/hours-of-video-uploaded-to-youtube-every-minute\/ [Online; accessed 29-May-2025]."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754479","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:48Z","timestamp":1765339488000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754479"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":18,"alternative-id":["10.1145\/3746027.3754479","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754479","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}