{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:24:35Z","timestamp":1764782675320,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,7]]},"DOI":"10.1145\/3767695.3769516","type":"proceedings-article","created":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:14:58Z","timestamp":1764782098000},"page":"231-239","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TVR-Ranking: A Dataset for Ranked Video Moment Retrieval with Imprecise Queries"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-2415-3979","authenticated-orcid":false,"given":"Renjie","family":"Liang","sequence":"first","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore and University of Florida, Gainesville, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1378-322X","authenticated-orcid":false,"given":"Chongzhi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2007-2706","authenticated-orcid":false,"given":"Li","family":"Li","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore and University of Southern California, Los Angeles, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4832-5969","authenticated-orcid":false,"given":"Jing","family":"Wang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5262-9713","authenticated-orcid":false,"given":"Xizhou","family":"Zhu","sequence":"additional","affiliation":[{"name":"SenseTime Research, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0764-4258","authenticated-orcid":false,"given":"Aixin","family":"Sun","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"54","article-title":"FLAIR: An easy-to-use framework for state-of-the-art NLP","author":"Akbik Alan","year":"2019","unstructured":"Alan Akbik, Tanja Bergmann, Duncan Blythe, Kashif Rasul, Stefan Schweter, and Roland Vollgraf. 2019. FLAIR: An easy-to-use framework for state-of-the-art NLP. In NAACL. 54-59.","journal-title":"NAACL."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Lisa Anne Hendricks Oliver Wang Eli Shechtman Josef Sivic Trevor Darrell and Bryan Russell. 2017. Localizing Moments in Video With Natural Language. In ICCV.","DOI":"10.1109\/ICCV.2017.618"},{"key":"e_1_3_2_1_3_1","first-page":"5267","article-title":"Tall: Temporal activity localization via language query","author":"Gao Jiyang","year":"2017","unstructured":"Jiyang Gao, Chen Sun, Zhenheng Yang, and Ram Nevatia. 2017. Tall: Temporal activity localization via language query. In ICCV. 5267-5275.","journal-title":"ICCV."},{"key":"e_1_3_2_1_4_1","unstructured":"Tianyu Gao Xingcheng Yao and Danqi Chen. 2021. SimCSE: Simple Contrastive Learning of Sentence Embeddings. In EMNLP."},{"key":"e_1_3_2_1_5_1","unstructured":"Kristen Grauman Andrew Westbury and Others. 2022. Ego4D: Around the World in 3 000 Hours of Egocentric Video. In CVPR. 18995-19012."},{"key":"e_1_3_2_1_6_1","volume-title":"A Dataset for Medical Instructional Video Classification and Question Answering. CoRR","author":"Gupta Deepak","year":"2022","unstructured":"Deepak Gupta, Kush Attal, and Dina Demner-Fushman. 2022. A Dataset for Medical Instructional Video Classification and Question Answering. CoRR, Vol. abs\/2201.12888 (2022). arXiv:2201.12888"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02036-y"},{"key":"e_1_3_2_1_8_1","first-page":"3900","article-title":"CONQUER: Contextual query-aware ranking for video corpus moment retrieval","author":"Hou Zhijian","year":"2021","unstructured":"Zhijian Hou, Chong-Wah Ngo, and Wing Kwong Chan. 2021. CONQUER: Contextual query-aware ranking for video corpus moment retrieval. In MM. 3900-3908.","journal-title":"MM."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/582415.582418"},{"key":"e_1_3_2_1_10_1","first-page":"18661","article-title":"Supervised contrastive learning","volume":"33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised contrastive learning. In NeurIPS, Vol. 33. 18661-18673.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_11_1","first-page":"706","article-title":"Dense-captioning events in videos","author":"Krishna Ranjay","year":"2017","unstructured":"Ranjay Krishna, Kenji Hata, Frederic Ren, Li Fei-Fei, and Juan Carlos Niebles. 2017. Dense-captioning events in videos. In ICCV. 706-715.","journal-title":"ICCV."},{"key":"e_1_3_2_1_12_1","first-page":"11846","article-title":"Detecting moments and highlights in videos via natural language queries","volume":"34","author":"Lei Jie","year":"2021","unstructured":"Jie Lei, Tamara L Berg, and Mohit Bansal. 2021. Detecting moments and highlights in videos via natural language queries. NeurIPS, Vol. 34 (2021), 11846-11858.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_13_1","volume-title":"TVR: A Large-Scale Dataset for Video-Subtitle Moment Retrieval. In ECCV.","author":"Lei Jie","year":"2020","unstructured":"Jie Lei, Licheng Yu, Tamara L Berg, and Mohit Bansal. 2020. TVR: A Large-Scale Dataset for Video-Subtitle Moment Retrieval. In ECCV."},{"key":"e_1_3_2_1_14_1","volume-title":"HERO: Hierarchical Encoder for Video Language Omni-representation Pre-training. In EMNLP.","author":"Li Linjie","year":"2020","unstructured":"Linjie Li, Yen-Chun Chen, Yu Cheng, Zhe Gan, Licheng Yu, and Jingjing Liu. 2020. HERO: Hierarchical Encoder for Video Language Omni-representation Pre-training. In EMNLP."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3556537"},{"key":"e_1_3_2_1_16_1","volume-title":"Introduction to Information Retrieval","author":"Manning Christopher D.","unstructured":"Christopher D. Manning, Prabhakar Raghavan, and Hinrich Sch\u00fctze. 2008. Introduction to Information Retrieval. Cambridge University Press. http:\/\/nlp.stanford.edu\/IR-book\/information-retrieval-book.html"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00207"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"e_1_3_2_1_19_1","first-page":"2137","article-title":"What actions are needed for understanding human actions in videos?","author":"Sigurdsson Gunnar A","year":"2017","unstructured":"Gunnar A Sigurdsson, Olga Russakovsky, and Abhinav Gupta. 2017. What actions are needed for understanding human actions in videos?. In ICCV. 2137-2146.","journal-title":"ICCV."},{"key":"e_1_3_2_1_20_1","volume-title":"Hollywood in homes: Crowdsourcing data collection for activity understanding","author":"Sigurdsson Gunnar A","unstructured":"Gunnar A Sigurdsson, G\u00fcl Varol, Xiaolong Wang, Ali Farhadi, Ivan Laptev, and Abhinav Gupta. 2016. Hollywood in homes: Crowdsourcing data collection for activity understanding. In ECCV. Springer, 510-526."},{"key":"e_1_3_2_1_21_1","first-page":"5026","article-title":"Mad: A scalable dataset for language grounding in videos from movie audio descriptions","author":"Soldan Mattia","year":"2022","unstructured":"Mattia Soldan, Alejandro Pardo, Juan Le\u00f3n Alc\u00e1zar, Fabian Caba, Chen Zhao, Silvio Giancola, and Bernard Ghanem. 2022. Mad: A scalable dataset for language grounding in videos from movie audio descriptions. In CVPR. 5026-5035.","journal-title":"CVPR."},{"key":"e_1_3_2_1_22_1","volume-title":"Temporal localization of moments in video collections with natural language. arXiv preprint arXiv:1907.12763","author":"Victor Escorcia","year":"2019","unstructured":"Escorcia Victor, Soldan Mattia, Sivic Josef, Ghanem Bernard, and Russell Bryan. 2019. Temporal localization of moments in video collections with natural language. arXiv preprint arXiv:1907.12763 (2019)."},{"key":"e_1_3_2_1_23_1","volume-title":"European Conference on Computer Vision. Springer, 185-200","author":"Yoon Sunjae","year":"2022","unstructured":"Sunjae Yoon, Ji Woo Hong, Eunseop Yoon, Dahyun Kim, Junyeong Kim, Hee Suk Yoon, and Chang D Yoo. 2022. Selective query-guided debiasing for video corpus moment retrieval. In European Conference on Computer Vision. Springer, 185-200."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.24825"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Tongtong Yuan Xuange Zhang Kun Liu Bo Liu Chen Chen Jian Jin and Zhenzhen Jiao. 2023. Towards Surveillance Video-and-Language Understanding: New Dataset Baselines and Challenges. arXiv:2309.13925 [cs.CV]","DOI":"10.1109\/CVPR52733.2024.02082"},{"key":"e_1_3_2_1_26_1","volume-title":"A Flexible and Scalable Framework for Video Moment Search. arXiv preprint arXiv:2501.05072","author":"Zhang Chongzhi","year":"2025","unstructured":"Chongzhi Zhang, Xizhou Zhu, and Aixin Sun. 2025. A Flexible and Scalable Framework for Video Moment Search. arXiv preprint arXiv:2501.05072 (2025)."},{"key":"e_1_3_2_1_27_1","volume-title":"Joey Tianyi Zhou, and Rick Siow Mong Goh","author":"Zhang Hao","year":"2021","unstructured":"Hao Zhang, Aixin Sun, Wei Jing, Guoshun Nan, Liangli Zhen, Joey Tianyi Zhou, and Rick Siow Mong Goh. 2021. Video Corpus Moment Retrieval with Contrastive Learning. In SIGIR."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3258628"}],"event":{"name":"SIGIR-AP 2025:Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","location":"Xi'an China","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 2025 Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region"],"original-title":[],"deposited":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:19:44Z","timestamp":1764782384000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3767695.3769516"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":28,"alternative-id":["10.1145\/3767695.3769516","10.1145\/3767695"],"URL":"https:\/\/doi.org\/10.1145\/3767695.3769516","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}