{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:04:13Z","timestamp":1750309453444,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3696409.3700199","type":"proceedings-article","created":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T09:55:23Z","timestamp":1735379723000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Dual-Stream Keyframe Enhancement for Video Question Answering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1042-8361","authenticated-orcid":false,"given":"Zhenzhen","family":"Hu","sequence":"first","affiliation":[{"name":"Hefei University of Technology, Hefei, CN"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8033-1641","authenticated-orcid":false,"given":"Xin","family":"Guan","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, CN"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9446-249X","authenticated-orcid":false,"given":"Jia","family":"Li","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, CN"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1262-764X","authenticated-orcid":false,"given":"Zijie","family":"Song","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, CN"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5461-3986","authenticated-orcid":false,"given":"Richang","family":"Hong","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, CN"}]}],"member":"320","published-online":{"date-parts":[[2024,12,28]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00293"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00238"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00210"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01419"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01113"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/94"},{"key":"e_1_3_3_1_8_2","unstructured":"Eric Jang Shixiang Gu and Ben Poole. 2016. Categorical reparameterization with gumbel-softmax. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1611.01144 (2016)."},{"key":"e_1_3_3_1_9_2","unstructured":"Sungdong Kim Jin-Hwa Kim Jiyoung Lee and Minjoon Seo. 2023. Semi-parametric video-grounded text generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.11507 (2023)."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00999"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018658"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01275"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612577"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_19"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Yun Liu Xiaoming Zhang Feiran Huang Bo Zhang and Zhoujun Li. 2022. Cross-attentional spatio-temporal semantic graph networks for video question answering. IEEE Transactions on Image Processing 31 (2022) 1684\u20131696.","DOI":"10.1109\/TIP.2022.3142526"},{"key":"e_1_3_3_1_16_2","unstructured":"Haoyu Lu Mingyu Ding Nanyi Fei Yuqi Huo and Zhiwu Lu. 2022. Lgdn: Language-guided denoising network for video-language modeling. Advances in Neural Information Processing Systems 35 (2022) 25198\u201325211."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-68790-8_27"},{"key":"e_1_3_3_1_18_2","first-page":"17656","volume-title":"International Conference on Machine Learning","author":"Petersen Felix","year":"2022","unstructured":"Felix Petersen, Hilde Kuehne, Christian Borgelt, and Oliver Deussen. 2022. Differentiable top-k classification learning. In International Conference on Machine Learning. PMLR, 17656\u201317668."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Tianwen Qian Ran Cui Jingjing Chen Pai Peng Xiaowei Guo and Yu-Gang Jiang. 2023. Locate before answering: Answer guided question localization for video question answering. IEEE Transactions on Multimedia (2023).","DOI":"10.1109\/TMM.2023.3323878"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548061"},{"key":"e_1_3_3_1_21_2","first-page":"8748","volume-title":"International Conference on Machine Learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. 8748\u20138763."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Javier Selva Anders\u00a0S Johansen Sergio Escalera Kamal Nasrollahi Thomas\u00a0B Moeslund and Albert Clap\u00e9s. 2023. Video transformers: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023).","DOI":"10.1109\/TPAMI.2023.3243465"},{"key":"e_1_3_3_1_23_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00638"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00965"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20184"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_3"},{"key":"e_1_3_3_1_28_2","unstructured":"Yujia Xie Hanjun Dai Minshuo Chen Bo Dai Tuo Zhao Hongyuan Zha Wei Wei and Tomas Pfister. 2020. Differentiable top-k with optimal transport. Advances in Neural Information Processing Systems 33 (2020) 20520\u201320531."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00171"},{"key":"e_1_3_3_1_30_2","unstructured":"Shoubin Yu Jaemin Cho Prateek Yadav and Mohit Bansal. 2023. Self-Chained Image-Language Model for Video Localization and Question Answering. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.06988 (2023)."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.432"}],"event":{"name":"MMAsia '24: ACM Multimedia Asia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Auckland New Zealand","acronym":"MMAsia '24"},"container-title":["Proceedings of the 6th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700199","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696409.3700199","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:11Z","timestamp":1750295411000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700199"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":30,"alternative-id":["10.1145\/3696409.3700199","10.1145\/3696409"],"URL":"https:\/\/doi.org\/10.1145\/3696409.3700199","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}