{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T03:35:15Z","timestamp":1775446515002,"version":"3.50.1"},"reference-count":66,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1704337"],"award-info":[{"award-number":["IIS-1704337"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1722847"],"award-info":[{"award-number":["IIS-1722847"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1813709"],"award-info":[{"award-number":["IIS-1813709"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,12,1]]},"DOI":"10.1109\/tpami.2021.3120745","type":"journal-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T18:42:13Z","timestamp":1634755333000},"page":"9073-9087","source":"Crossref","is-referenced-by-count":53,"title":["Multi-Scale 2D Temporal Adjacency Networks for Moment Localization With Natural Language"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4316-3320","authenticated-orcid":false,"given":"Songyang","family":"Zhang","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Rochester, Rochester, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8544-8952","authenticated-orcid":false,"given":"Houwen","family":"Peng","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1025-2012","authenticated-orcid":false,"given":"Jianlong","family":"Fu","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"given":"Yijuan","family":"Lu","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4516-9729","authenticated-orcid":false,"given":"Jiebo","family":"Luo","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Rochester, Rochester, NY, USA"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6924"},{"key":"ref38","article-title":"Tripping through time: Efficient localization of activities in videos","author":"hahn","year":"2019","journal-title":"Proc IEEE\/CVF Conf Comput Vis Pattern Recognit Workshop"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6627"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093328"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01082"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240549"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00042"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018393"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3323873.3325019"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1518"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref62","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/143"},{"key":"ref28","first-page":"1984","article-title":"ExCL: Extractive clip localization using natural language descriptions","author":"ghosh","year":"2019","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics"},{"key":"ref64","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Proc Conf Neural Inf Process Syst"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1168"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350879"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01030"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2959977"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.317"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00767-6_32"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123343"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6984"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331235"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1015"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.585"},{"key":"ref25","first-page":"534","article-title":"Semantic conditioned dynamic modulation for temporal sentence grounding in videos","author":"yuan","year":"2019","journal-title":"Proc Conf Neural Inf Process Syst"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref56","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33718-5_11"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00207"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00457"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"ref11","article-title":"Temporal localization of moments in video collections with natural language","author":"escorcia","year":"2019"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019062"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00418"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2965987"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1167"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_13"},{"key":"ref16","first-page":"1916","article-title":"Weakly supervised dense event captioning in videos","author":"duan","year":"2018","journal-title":"Proc Conf Neural Inf Process Syst"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1117"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00032"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210003"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2922108"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.86"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00333"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298981"},{"key":"ref7","first-page":"5179","article-title":"TVSum: Summarizing web videos using titles","author":"song","year":"2015","journal-title":"Proc IEEE Conf Comput Vis and Pattern Recog"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6815"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.563"},{"key":"ref46","article-title":"Longformer: The long-document transformer","author":"beltagy","year":"2020"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6897"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00399"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1032"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00134"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018199"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_34"},{"key":"ref43","first-page":"15387","article-title":"Understanding the representation power of graph neural networks in learning graph topology","author":"dehmamy","year":"2019","journal-title":"Proc Conf Neural Inf Process Syst"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/34\/9940447\/9580623-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9940447\/09580623.pdf?arnumber=9580623","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,12]],"date-time":"2023-07-12T17:22:19Z","timestamp":1689182539000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9580623\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":66,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3120745","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,1]]}}}