{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,11]],"date-time":"2026-07-11T02:28:25Z","timestamp":1783736905491,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB3103700, 2022YFB3103704"],"award-info":[{"award-number":["2022YFB3103700, 2022YFB3103704"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62276248 and U21B2046"],"award-info":[{"award-number":["No. 62276248 and U21B2046"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["No. 2023111"],"award-info":[{"award-number":["No. 2023111"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3658088","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"394-403","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Improving Video Corpus Moment Retrieval with Partial Relevance Enhancement"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6949-2703","authenticated-orcid":false,"given":"Danyang","family":"Hou","sequence":"first","affiliation":[{"name":"CAS Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1161-8546","authenticated-orcid":false,"given":"Liang","family":"Pang","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1081-8119","authenticated-orcid":false,"given":"Huawei","family":"Shen","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5201-8195","authenticated-orcid":false,"given":"Xueqi","family":"Cheng","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of AI Safety, Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"e_1_3_2_1_2_1","volume-title":"3rd International Conference on Learning Representations.","author":"Bahdanau Dzmitry","year":"2015","unstructured":"Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2015. Neural Machine Translation by Jointly Learning to Align and Translate. In 3rd International Conference on Learning Representations."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.773"},{"key":"e_1_3_2_1_4_1","volume-title":"End-to-end Object Detection with Transformers. In European Conference on Computer Vision. Springer, 213--229","author":"Carion Nicolas","year":"2020","unstructured":"Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. 2020. End-to-end Object Detection with Transformers. In European Conference on Computer Vision. Springer, 213--229."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1015"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018175"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6627"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018199"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01065"},{"key":"e_1_3_2_1_10_1","volume-title":"Cross-Modality Knowledge Calibration Network for Video Corpus Moment Retrieval","author":"Chen Tongbao","year":"2023","unstructured":"Tongbao Chen, Wenmin Wang, Zhe Jiang, Ruochen Li, and Bingshu Wang. 2023. Cross-Modality Knowledge Calibration Network for Video Corpus Moment Retrieval. IEEE Transactions on Multimedia (2023)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1078"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059295"},{"key":"e_1_3_2_1_13_1","volume-title":"Russell","author":"Escorcia Victor","year":"2019","unstructured":"Victor Escorcia, Mattia Soldan, Josef Sivic, Bernard Ghanem, and Bryan C. Russell. 2019. Temporal Localization of Moments in Video Collections with Natural Language. (2019). arXiv:1907.12763"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_1_15_1","volume-title":"Multimodal Transformer for Video Retrieval. In European Conference on Computer Vision. Springer, 214--229","author":"Gabeur Valentin","year":"2020","unstructured":"Valentin Gabeur, Chen Sun, Karteek Alahari, and Cordelia Schmid. 2020. Multimodal Transformer for Video Retrieval. In European Conference on Computer Vision. Springer, 214--229."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 1984--1990","author":"Ghosh Soham","year":"2019","unstructured":"Soham Ghosh, Anuva Agarwal, Zarana Parekh, and Alexander G Hauptmann. 2019. ExCL: Extractive Clip Localization Using Natural Language Descriptions. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 1984--1990."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475241"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475281"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462974"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_22_1","first-page":"11846","article-title":"Detecting Moments and Highlights in Videos via Natural Language Queries","volume":"34","author":"Lei Jie","year":"2021","unstructured":"Jie Lei, Tamara L Berg, and Mohit Bansal. 2021. Detecting Moments and Highlights in Videos via Natural Language Queries. Advances in Neural Information Processing Systems 34 (2021), 11846--11858.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_27"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16285"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.161"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01108"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240549"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00305"},{"key":"e_1_3_2_1_29_1","volume-title":"Roberta: A Robustly Optimized BERT Pretraining Approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A Robustly Optimized BERT Pretraining Approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00272"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02205"},{"key":"e_1_3_2_1_32_1","volume-title":"Support-set Bottlenecks for Video-text Representation Learning. In International Conference on Learning Representations.","author":"Patrick Mandela","year":"2020","unstructured":"Mandela Patrick, Po-Yao Huang, Yuki Asano, Florian Metze, Alexander G Hauptmann, Joao F Henriques, and Andrea Vedaldi. 2020. Support-set Bottlenecks for Video-text Representation Learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3090595"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00208"},{"key":"e_1_3_2_1_35_1","volume-title":"The new data and new challenges in multimedia research. arXiv preprint arXiv:1503.01817 1, 8","author":"Thomee Bart","year":"2015","unstructured":"Bart Thomee, David A Shamma, Gerald Friedland, Benjamin Elizalde, Karl Ni, Douglas Poland, Damian Borth, and Li-Jia Li. 2015. The new data and new challenges in multimedia research. arXiv preprint arXiv:1503.01817 1, 8 (2015)."},{"key":"e_1_3_2_1_36_1","volume-title":"Contrastive Multiview Coding. In European Conference on Computer Vision. Springer, 776--794","author":"Tian Yonglong","year":"2020","unstructured":"Yonglong Tian, Dilip Krishnan, and Phillip Isola. 2020. Contrastive Multiview Coding. In European Conference on Computer Vision. Springer, 776--794."},{"key":"e_1_3_2_1_37_1","volume-title":"Representation Learning with Contrastive Predictive Coding. arXiv e-prints","author":"den Oord Aaron Van","year":"2018","unstructured":"Aaron Van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation Learning with Contrastive Predictive Coding. arXiv e-prints (2018), arXiv-1807."},{"key":"e_1_3_2_1_38_1","volume-title":"Attention is All You Need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All You Need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1018"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531899"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00504"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475515"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.327"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019062"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401151"},{"key":"e_1_3_2_1_46_1","volume-title":"Selective Query-Guided Debiasing for Video Corpus Moment Retrieval. In European Conference on Computer Vision. Springer, 185--200","author":"Yoon Sunjae","year":"2022","unstructured":"Sunjae Yoon, Ji Woo Hong, Eunseop Yoon, Dahyun Kim, Junyeong Kim, Hee Suk Yoon, and Chang D Yoo. 2022. Selective Query-Guided Debiasing for Video Corpus Moment Retrieval. In European Conference on Computer Vision. Springer, 185--200."},{"key":"e_1_3_2_1_47_1","volume-title":"QANet: Combining Local Convolution with Global Self-Attention for Reading Comprehension. In International Conference on Learning Representations.","author":"Yu Adams Wei","year":"2018","unstructured":"Adams Wei Yu, David Dohan, Minh-Thang Luong, Rui Zhao, Kai Chen, Mohammad Norouzi, and Quoc V Le. 2018. QANet: Combining Local Convolution with Global Self-Attention for Reading Comprehension. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01030"},{"key":"e_1_3_2_1_50_1","volume-title":"A Hierarchical Multi-modal Encoder for Moment Localization in Video Corpus. arXiv preprint arXiv:2011.09046","author":"Zhang Bowen","year":"2020","unstructured":"Bowen Zhang, Hexiang Hu, Joonseok Lee, Ming Zhao, Sheide Chammas, Vihan Jain, Eugene Ie, and Fei Sha. 2020. A Hierarchical Multi-modal Encoder for Moment Localization in Video Corpus. arXiv preprint arXiv:2011.09046 (2020)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00134"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462874"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.585"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01248"},{"key":"e_1_3_2_1_55_1","volume-title":"Video Corpus Moment Retrieval via Deformable Multigranularity Feature Fusion and Adversarial Training","author":"Zhang Xuemei","year":"2023","unstructured":"Xuemei Zhang, Peng Zhao, Jinsheng Ji, Xiankai Lu, and Yilong Yin. 2023. Video Corpus Moment Retrieval via Deformable Multigranularity Feature Fusion and Adversarial Training. IEEE Transactions on Circuits and Systems for Video Technology (2023)."}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","location":"Phuket Thailand","acronym":"ICMR '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658088","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3658088","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:53:57Z","timestamp":1755766437000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658088"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":55,"alternative-id":["10.1145\/3652583.3658088","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3658088","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}