{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:19:59Z","timestamp":1768072799833,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62022083, U21B2038 and 61931008"],"award-info":[{"award-number":["62022083, U21B2038 and 61931008"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2018AAA0102000"],"award-info":[{"award-number":["2018AAA0102000"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3503161.3547785","type":"proceedings-article","created":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T15:43:01Z","timestamp":1665416581000},"page":"4789-4800","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Concept Propagation via Attentional Knowledge Graph Reasoning for Video-Text Retrieval"],"prefix":"10.1145","author":[{"given":"Sheng","family":"Fang","sequence":"first","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuhui","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences &amp; Peng Cheng Laboratory, Beijing; Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junbao","family":"Zhuo","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingming","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Ma","sequence":"additional","affiliation":[{"name":"Meituan Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoming","family":"Wei","sequence":"additional","affiliation":[{"name":"Meituan Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaolin","family":"Wei","sequence":"additional","affiliation":[{"name":"Meituan Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,10]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/2002472.2002497"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01065"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475534"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01138"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding . In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019 , Minneapolis, MN, USA, June 2--7 , 2019, Volume 1 (Long and Short Papers), Jill Burstein, Christy Doran, and Thamar Solorio (Eds.). Association for Computational Linguistics, 4171--4186. https:\/\/doi.org\/10.18653\/v1\/n19--1423 Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2--7, 2019, Volume 1 (Long and Short Papers), Jill Burstein, Christy Doran, and Thamar Solorio (Eds.). Association for Computational Linguistics, 4171--4186. https:\/\/doi.org\/10.18653\/v1\/n19--1423"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2832602"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00957"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059295"},{"key":"e_1_3_2_2_11_1","volume-title":"Jamie Ryan Kiros, and Sanja Fidler","author":"Faghri Fartash","year":"2017","unstructured":"Fartash Faghri , David J Fleet , Jamie Ryan Kiros, and Sanja Fidler . 2017 . Vse : Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612 (2017). Fartash Faghri, David J Fleet, Jamie Ryan Kiros, and Sanja Fidler. 2017. Vse: Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612 (2017)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"e_1_3_2_2_13_1","volume-title":"Exploiting Visual Semantic Reasoning for Video-Text Retrieval. arXiv preprint arXiv:2006.08889","author":"Feng Zerun","year":"2020","unstructured":"Zerun Feng , Zhimin Zeng , Caili Guo , and Zheng Li. 2020. Exploiting Visual Semantic Reasoning for Video-Text Retrieval. arXiv preprint arXiv:2006.08889 ( 2020 ). Zerun Feng, Zhimin Zeng, Caili Guo, and Zheng Li. 2020. Exploiting Visual Semantic Reasoning for Video-Text Retrieval. arXiv preprint arXiv:2006.08889 (2020)."},{"key":"e_1_3_2_2_14_1","volume-title":"Multi-modal Transformer for Video Retrieval. In European Conference on Computer Vision (ECCV)","volume":"5","author":"Gabeur Valentin","year":"2020","unstructured":"Valentin Gabeur , Chen Sun , Karteek Alahari , and Cordelia Schmid . 2020 . Multi-modal Transformer for Video Retrieval. In European Conference on Computer Vision (ECCV) , Vol. 5 . Springer. Valentin Gabeur, Chen Sun, Karteek Alahari, and Cordelia Schmid. 2020. Multi-modal Transformer for Video Retrieval. In European Conference on Computer Vision (ECCV), Vol. 5. Springer."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475241"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_17_1","volume-title":"Unifying visual-semantic embeddings with multimodal neural language models. arXiv preprint arXiv:1411.2539","author":"Kiros Ryan","year":"2014","unstructured":"Ryan Kiros , Ruslan Salakhutdinov , and Richard S Zemel . 2014. Unifying visual-semantic embeddings with multimodal neural language models. arXiv preprint arXiv:1411.2539 ( 2014 ). Ryan Kiros, Ruslan Salakhutdinov, and Richard S Zemel. 2014. Unifying visual-semantic embeddings with multimodal neural language models. arXiv preprint arXiv:1411.2539 (2014)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_2_19_1","volume-title":"Minh-Triet Tran, Yuki Watanabe, Martin Klinkigt, et al.","author":"Le Duy-Dinh","year":"2016","unstructured":"Duy-Dinh Le , Sang Phan , Vinh-Tiep Nguyen , Benjamin Renoust , Tuan A Nguyen , Van-Nam Hoang , Thanh Duc Ngo , Minh-Triet Tran, Yuki Watanabe, Martin Klinkigt, et al. 2016 . NII-HITACHI-UIT at TRECVID 2016.. In TRECVID. Duy-Dinh Le, Sang Phan, Vinh-Tiep Nguyen, Benjamin Renoust, Tuan A Nguyen, Van-Nam Hoang, Thanh Duc Ngo, Minh-Triet Tran, Yuki Watanabe, Martin Klinkigt, et al. 2016. NII-HITACHI-UIT at TRECVID 2016.. In TRECVID."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00725"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the 27th ACM International Conference on Multimedia. 1786--1794","author":"Li Xirong","year":"2019","unstructured":"Xirong Li , Chaoxi Xu , Gang Yang , Zhineng Chen , and Jianfeng Dong . 2019 . W2VV Fully Deep Learning for Ad-hoc Video Search . In Proceedings of the 27th ACM International Conference on Multimedia. 1786--1794 . Xirong Li, Chaoxi Xu, Gang Yang, Zhineng Chen, and Jianfeng Dong. 2019. W2VV Fully Deep Learning for Ad-hoc Video Search. In Proceedings of the 27th ACM International Conference on Multimedia. 1786--1794."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.502"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475621"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01170"},{"key":"e_1_3_2_2_25_1","volume-title":"30th British Machine Vision Conference 2019, BMVC 2019","author":"Liu Yang","year":"2019","unstructured":"Yang Liu , Samuel Albanie , Arsha Nagrani , and Andrew Zisserman . 2019 . Use What You Have: Video retrieval using representations from collaborative experts . In 30th British Machine Vision Conference 2019, BMVC 2019 , Cardiff, UK, September 9--12 , 2019. BMVA Press, 279. https:\/\/bmvc2019.org\/wp-content\/uploads\/papers\/0363-paper.pdf Yang Liu, Samuel Albanie, Arsha Nagrani, and Andrew Zisserman. 2019. Use What You Have: Video retrieval using representations from collaborative experts. In 30th British Machine Vision Conference 2019, BMVC 2019, Cardiff, UK, September 9--12, 2019. BMVA Press, 279. https:\/\/bmvc2019.org\/wp-content\/uploads\/papers\/0363-paper.pdf"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3078971.3079041"},{"key":"e_1_3_2_2_28_1","volume-title":"Learning a text-video embedding from incomplete and heterogeneous data. arXiv preprint arXiv:1804.02516","author":"Miech Antoine","year":"2018","unstructured":"Antoine Miech , Ivan Laptev , and Josef Sivic . 2018. Learning a text-video embedding from incomplete and heterogeneous data. arXiv preprint arXiv:1804.02516 ( 2018 ). Antoine Miech, Ivan Laptev, and Josef Sivic. 2018. Learning a text-video embedding from incomplete and heterogeneous data. arXiv preprint arXiv:1804.02516 (2018)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00272"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3206025.3206064"},{"key":"e_1_3_2_2_31_1","unstructured":"Phuong Anh Nguyen Qing Li Zhi-Qi Cheng Yi-Jie Lu Hao Zhang Xiao Wu and Chong-Wah Ngo. 2017. VIREO@ TRECVID 2017: Video-to-Text Ad-hoc Video Search and Video hyperlinking.. In TRECVID.  Phuong Anh Nguyen Qing Li Zhi-Qi Cheng Yi-Jie Lu Hao Zhang Xiao Wu and Chong-Wah Ngo. 2017. VIREO@ TRECVID 2017: Video-to-Text Ad-hoc Video Search and Video hyperlinking.. In TRECVID."},{"key":"e_1_3_2_2_32_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Patrick Mandela","year":"2021","unstructured":"Mandela Patrick , Po-Yao Huang , Yuki Markus Asano , Florian Metze , Alexander G. Hauptmann , Jo a o F. Henriques , and Andrea Vedaldi . 2021 . Support-set bottlenecks for video-text representation learning . In 9th International Conference on Learning Representations, ICLR 2021 , Virtual Event, Austria, May 3--7 , 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=EqoXe2zmhrh Mandela Patrick, Po-Yao Huang, Yuki Markus Asano, Florian Metze, Alexander G. Hauptmann, Jo a o F. Henriques, and Andrea Vedaldi. 2021. Support-set bottlenecks for video-text representation learning. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=EqoXe2zmhrh"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413954"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059923"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413618"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/3298023.3298212"},{"key":"e_1_3_2_2_37_1","unstructured":"Kazuya Ueki Koji Hirakawa Kotaro Kikuchi Tetsuji Ogawa and Tetsunori Kobayashi. 2017. Waseda_Meisei at TRECVID 2017: Ad-hoc Video Search.. In TRECVID.  Kazuya Ueki Koji Hirakawa Kotaro Kikuchi Tetsuji Ogawa and Tetsunori Kobayashi. 2017. Waseda_Meisei at TRECVID 2017: Ad-hoc Video Search.. In TRECVID."},{"key":"e_1_3_2_2_38_1","volume-title":"6th International Conference on Learning Representations, ICLR","author":"Velickovic Petar","year":"2018","unstructured":"Petar Velickovic , Guillem Cucurull , Arantxa Casanova , Adriana Romero , Pietro Li\u00f2 , and Yoshua Bengio . 2018. Graph Attention Networks . In 6th International Conference on Learning Representations, ICLR 2018 , Vancouver, BC , Canada, April 30 - May 3, 2018, Conference Track Proceedings. OpenReview .net. https:\/\/openreview.net\/forum?id=rJXMpikCZ Petar Velickovic, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. 2018. Graph Attention Networks. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. OpenReview.net. https:\/\/openreview.net\/forum?id=rJXMpikCZ"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00504"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3088863"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00054"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413916"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01136"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401151"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00877"}],"event":{"name":"MM '22: The 30th ACM International Conference on Multimedia","location":"Lisboa Portugal","acronym":"MM '22","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 30th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3547785","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503161.3547785","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:41Z","timestamp":1750188641000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3547785"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":47,"alternative-id":["10.1145\/3503161.3547785","10.1145\/3503161"],"URL":"https:\/\/doi.org\/10.1145\/3503161.3547785","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2022-10-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}