{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,7]],"date-time":"2026-06-07T00:06:32Z","timestamp":1780790792866,"version":"3.54.1"},"reference-count":93,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"U.S. National Institute of Health","award":["1R25EY029127"],"award-info":[{"award-number":["1R25EY029127"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1109\/tcsvt.2022.3177320","type":"journal-article","created":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T20:55:07Z","timestamp":1653339307000},"page":"6642-6656","source":"Crossref","is-referenced-by-count":105,"title":["Video Captioning Using Global-Local Representation"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7077-4947","authenticated-orcid":false,"given":"Liqi","family":"Yan","sequence":"first","affiliation":[{"name":"Westlake Institute for Advanced Study, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Siqi","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Engineering, Westlake University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7570-5756","authenticated-orcid":false,"given":"Qifan","family":"Wang","sequence":"additional","affiliation":[{"name":"Meta AI, Menlo Park, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6705-3535","authenticated-orcid":false,"given":"Yingjie","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Graphics Technology, Purdue University, West Lafayette, IN, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiangyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Graphics Technology, Purdue University, West Lafayette, IN, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9657-3027","authenticated-orcid":false,"given":"Andreas","family":"Savakis","sequence":"additional","affiliation":[{"name":"Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dongfang","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.05.027"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00347"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00803"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00969"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240632"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00273"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00751"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2909864"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00901"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00852"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00710"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00674"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3045735"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2988435"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.08.042"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01067"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00582"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01020"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01329"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01088"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093361"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00487"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00900"},{"key":"ref27","first-page":"190","article-title":"Collecting highly parallel data for paraphrase evaluation","volume-title":"Proc. ACL","author":"Chen"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.337"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11752-2_15"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.61"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_22"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3063423"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00854"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3131721"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.127"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00013"},{"key":"ref38","first-page":"3063","article-title":"Weakly supervised dense event captioning in videos","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Duan"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.108"},{"key":"ref40","article-title":"CLEVRER: CoLlision events for video REpresentation and reasoning","volume-title":"arXiv:1910.01442","author":"Yi","year":"2019"},{"key":"ref41","article-title":"Grounding physical concepts of objects and events through dynamic visual reasoning","volume-title":"arXiv:2103.16564","author":"Chen","year":"2021"},{"key":"ref42","article-title":"Star: A benchmark for situated reasoning in real-world videos","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst. Datasets Benchmarks Track (Round)","author":"Wu"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.339"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/88"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.111"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018167"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05710-7_4"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.497"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.512"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351072"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123327"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3058626"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00279"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6704"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.2.270"},{"key":"ref56","first-page":"1171","article-title":"Scheduled sampling for sequence prediction with recurrent neural networks","volume-title":"Proc. NIPS","author":"Bengio"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2020.475767"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1426"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00751"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.131"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00443"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1103"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/98"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/3390891"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.128"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3014606"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CAC.2017.8243510"},{"key":"ref68","article-title":"ConvNet architecture search for spatiotemporal feature learning","volume-title":"arXiv:1708.05038","author":"Tran","year":"2017"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1137"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00854"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref73","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume-title":"Proc. ACL Workshop Intrinsic Extrinsic Eval. Measures Mach. Transl. Summarization","author":"Banerjee"},{"key":"ref74","first-page":"10","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Text Summarization Branches Out","author":"Lin","year":"2004"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351060"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_43"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref80","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume-title":"arXiv:1502.03167","author":"Ioffe","year":"2015"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00155"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00795"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01311"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123448"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2984066"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2984065"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-2125"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1980.1163420"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/76\/9910246\/09780119.pdf?arnumber=9780119","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T22:23:41Z","timestamp":1705962221000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9780119\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10]]},"references-count":93,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2022.3177320","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10]]}}}