{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T02:13:43Z","timestamp":1768097623595,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T00:00:00Z","timestamp":1656288000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Zhejiang Lab","award":["2022NB0AB05"],"award-info":[{"award-number":["2022NB0AB05"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,27]]},"DOI":"10.1145\/3512527.3531382","type":"proceedings-article","created":{"date-parts":[[2022,6,23]],"date-time":"2022-06-23T22:23:32Z","timestamp":1656023012000},"page":"360-368","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Phrase-level Prediction for Video Temporal Localization"],"prefix":"10.1145","author":[{"given":"Sizhe","family":"Li","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chang","family":"Li","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minghang","family":"Zheng","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Liu","sequence":"additional","affiliation":[{"name":"Peking University &amp; Beijing Institute for General Artificial Intelligence, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,6,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0966-6"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0966-6"},{"key":"e_1_3_2_2_3_1","volume-title":"Video Surveillance for Road Traffic Monitoring. ArXiv abs\/2105.04908","author":"Albacar Pol","year":"2021","unstructured":"Pol Albacar , Oscar Lorente , Eduard Mainou , and Ian Riera . 2021. Video Surveillance for Road Traffic Monitoring. ArXiv abs\/2105.04908 ( 2021 ). Pol Albacar, Oscar Lorente, Eduard Mainou, and Ian Riera. 2021. Video Surveillance for Road Traffic Monitoring. ArXiv abs\/2105.04908 (2021)."},{"key":"e_1_3_2_2_4_1","volume-title":"Yu","author":"Chen Jianguo","year":"2019","unstructured":"Jianguo Chen , Kenli Li , Qingying Deng , Keqin Li , and Philip S . Yu . 2019 . Distributed Deep Learning Model for Intelligent Video Surveillance Systems with Edge Computing. ArXiv abs\/1904.06400 (2019). Jianguo Chen, Kenli Li, Qingying Deng, Keqin Li, and Philip S. Yu. 2019. Distributed Deep Learning Model for Intelligent Video Surveillance Systems with Edge Computing. ArXiv abs\/1904.06400 (2019)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_36"},{"key":"e_1_3_2_2_6_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL. Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL."},{"key":"e_1_3_2_2_7_1","volume-title":"Weak Supervision and Referring Attention for Temporal-Textual Association Learning. arXiv preprint arXiv:2006.11747","author":"Fang Zhiyuan","year":"2020","unstructured":"Zhiyuan Fang , Shu Kong , Zhe Wang , Charless Fowlkes , and Yezhou Yang . 2020. Weak Supervision and Referring Attention for Temporal-Textual Association Learning. arXiv preprint arXiv:2006.11747 ( 2020 ). Zhiyuan Fang, Shu Kong, Zhe Wang, Charless Fowlkes, and Yezhou Yang. 2020. Weak Supervision and Referring Attention for Temporal-Textual Association Learning. arXiv preprint arXiv:2006.11747 (2020)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.563"},{"key":"e_1_3_2_2_9_1","volume-title":"Relationaware Video Reading Comprehension for Temporal Language Grounding. ArXiv abs\/2110.05717","author":"Gao Jialin","year":"2021","unstructured":"Jialin Gao , Xin Sun , Mengmeng Xu , Xi Zhou , and Bernard Ghanem . 2021. Relationaware Video Reading Comprehension for Temporal Language Grounding. ArXiv abs\/2110.05717 ( 2021 ). Jialin Gao, Xin Sun, Mengmeng Xu, Xi Zhou, and Bernard Ghanem. 2021. Relationaware Video Reading Comprehension for Temporal Language Grounding. ArXiv abs\/2110.05717 (2021)."},{"key":"e_1_3_2_2_10_1","volume-title":"Manning","author":"Hudson Drew A.","year":"2018","unstructured":"Drew A. Hudson and Christopher D . Manning . 2018 . Compositional Attention Networks for Machine Reasoning. ArXiv abs\/1803.03067 (2018). Drew A. Hudson and Christopher D. Manning. 2018. Compositional Attention Networks for Machine Reasoning. ArXiv abs\/1803.03067 (2018)."},{"key":"e_1_3_2_2_11_1","volume-title":"Hadamard product for low-rank bilinear pooling. arXiv preprint arXiv:1610.04325","author":"Kim Jin-Hwa","year":"2016","unstructured":"Jin-Hwa Kim , Kyoung-Woon On , Woosang Lim , Jeonghee Kim , Jung-Woo Ha , and Byoung-Tak Zhang . 2016. Hadamard product for low-rank bilinear pooling. arXiv preprint arXiv:1610.04325 ( 2016 ). Jin-Hwa Kim, Kyoung-Woon On, Woosang Lim, Jeonghee Kim, Jung-Woo Ha, and Byoung-Tak Zhang. 2016. Hadamard product for low-rank bilinear pooling. arXiv preprint arXiv:1610.04325 (2016)."},{"key":"e_1_3_2_2_12_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba . 2015 . Adam : A Method for Stochastic Optimization. CoRR abs\/1412.6980 (2015). Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. CoRR abs\/1412.6980 (2015)."},{"key":"e_1_3_2_2_13_1","volume-title":"Dense-Captioning Events in Videos. In International Conference on Computer Vision (ICCV).","author":"Krishna Ranjay","year":"2017","unstructured":"Ranjay Krishna , Kenji Hata , Frederic Ren , Li Fei-Fei , and Juan Carlos Niebles . 2017 . Dense-Captioning Events in Videos. In International Conference on Computer Vision (ICCV). Ranjay Krishna, Kenji Hata, Frederic Ren, Li Fei-Fei, and Juan Carlos Niebles. 2017. Dense-Captioning Events in Videos. In International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_2_14_1","volume-title":"Mo Yu, Bing Xiang, Bowen Zhou, and Yoshua Bengio.","author":"Lin Zhouhan","year":"2017","unstructured":"Zhouhan Lin , Minwei Feng , C\u00edcero Nogueira dos Santos , Mo Yu, Bing Xiang, Bowen Zhou, and Yoshua Bengio. 2017 . A Structured Self-attentive Sentence Embedding. ArXiv abs\/1703.03130 (2017). Zhouhan Lin, Minwei Feng, C\u00edcero Nogueira dos Santos, Mo Yu, Bing Xiang, Bowen Zhou, and Yoshua Bengio. 2017. A Structured Self-attentive Sentence Embedding. ArXiv abs\/1703.03130 (2017)."},{"key":"e_1_3_2_2_15_1","unstructured":"Tomas Mikolov Kai Chen Gregory S. Corrado and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. In ICLR.  Tomas Mikolov Kai Chen Gregory S. Corrado and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. In ICLR."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01186"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01082"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414053"},{"key":"e_1_3_2_2_19_1","volume-title":"Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. ArXiv abs\/1908.10084","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych . 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. ArXiv abs\/1908.10084 ( 2019 ). Nils Reimers and Iryna Gurevych. 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. ArXiv abs\/1908.10084 (2019)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00112"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW52041.2021.00009"},{"key":"e_1_3_2_2_22_1","volume-title":"Proceedings of the 27th International Conference on Computational Linguistics. Association for Computational Linguistics","author":"Shimizu Nobuyuki","year":"2018","unstructured":"Nobuyuki Shimizu , Na Rong , and Takashi Miyazaki . 2018 . Visual Question Answering Dataset for Bilingual Image Understanding: A Study of Cross- Lingual Transfer Using Attention Maps . In Proceedings of the 27th International Conference on Computational Linguistics. Association for Computational Linguistics , Santa Fe, New Mexico, USA , 1918--1928. https:\/\/aclanthology.org\/C18- 1163 Nobuyuki Shimizu, Na Rong, and Takashi Miyazaki. 2018. Visual Question Answering Dataset for Bilingual Image Understanding: A Study of Cross- Lingual Transfer Using Attention Maps. In Proceedings of the 27th International Conference on Computational Linguistics. Association for Computational Linguistics, Santa Fe, New Mexico, USA, 1918--1928. https:\/\/aclanthology.org\/C18- 1163"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413975"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00695"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475278"},{"key":"e_1_3_2_2_26_1","volume-title":"Negative Sample Matters: A Renaissance of Metric Learning for Temporal Grounding. CoRR abs\/2109.04872","author":"Wang Zhenzhi","year":"2021","unstructured":"Zhenzhi Wang , Limin Wang , Tao Wu , Tianhao Li , and Gangshan Wu. 2021. Negative Sample Matters: A Renaissance of Metric Learning for Temporal Grounding. CoRR abs\/2109.04872 ( 2021 ). Zhenzhi Wang, Limin Wang, Tao Wu, Tianhao Li, and Gangshan Wu. 2021. Negative Sample Matters: A Renaissance of Metric Learning for Temporal Grounding. CoRR abs\/2109.04872 (2021)."},{"key":"e_1_3_2_2_27_1","volume-title":"Negative Sample Matters: A Renaissance of Metric Learning for Temporal Grounding. ArXiv abs\/2109.04872","author":"Wang Zhenzhi","year":"2021","unstructured":"Zhenzhi Wang , Limin Wang , Tao Wu , Tianhao Li , and Gangshan Wu. 2021. Negative Sample Matters: A Renaissance of Metric Learning for Temporal Grounding. ArXiv abs\/2109.04872 ( 2021 ). Zhenzhi Wang, Limin Wang, Tao Wu, Tianhao Li, and Gangshan Wu. 2021. Negative Sample Matters: A Renaissance of Metric Learning for Temporal Grounding. ArXiv abs\/2109.04872 (2021)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16406"},{"key":"e_1_3_2_2_29_1","volume-title":"Dynamic Graph Attention for Referring Expression Comprehension. 2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Yang Sibei","year":"2019","unstructured":"Sibei Yang , Guanbin Li , and Yizhou Yu . 2019 . Dynamic Graph Attention for Referring Expression Comprehension. 2019 IEEE\/CVF International Conference on Computer Vision (ICCV) (2019), 4643--4652. Sibei Yang, Guanbin Li, and Yizhou Yu. 2019. Dynamic Graph Attention for Referring Expression Comprehension. 2019 IEEE\/CVF International Conference on Computer Vision (ICCV) (2019), 4643--4652."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Yitian Yuan Tao Mei and Wenwu Zhu. 2019. To Find Where You Talk: Temporal Sentence Localization in Video with Attention Based Location Regression. In AAAI.  Yitian Yuan Tao Mei and Wenwu Zhu. 2019. To Find Where You Talk: Temporal Sentence Localization in Video with Attention Based Location Regression. In AAAI.","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"e_1_3_2_2_31_1","volume-title":"Span-based localizing network for natural language video localization. arXiv preprint arXiv:2004.13931","author":"Zhang Hao","year":"2020","unstructured":"Hao Zhang , Aixin Sun , Wei Jing , and Joey Tianyi Zhou . 2020. Span-based localizing network for natural language video localization. arXiv preprint arXiv:2004.13931 ( 2020 ). Hao Zhang, Aixin Sun, Wei Jing, and Joey Tianyi Zhou. 2020. Span-based localizing network for natural language video localization. arXiv preprint arXiv:2004.13931 (2020)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01248"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6984"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3113791"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00418"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01511"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00834"}],"event":{"name":"ICMR '22: International Conference on Multimedia Retrieval","location":"Newark NJ USA","acronym":"ICMR '22","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2022 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3512527.3531382","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3512527.3531382","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:12Z","timestamp":1750188612000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3512527.3531382"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,27]]},"references-count":37,"alternative-id":["10.1145\/3512527.3531382","10.1145\/3512527"],"URL":"https:\/\/doi.org\/10.1145\/3512527.3531382","relation":{},"subject":[],"published":{"date-parts":[[2022,6,27]]},"assertion":[{"value":"2022-06-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}