{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,5]],"date-time":"2026-07-05T04:40:34Z","timestamp":1783226434095,"version":"3.54.6"},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276120"],"award-info":[{"award-number":["62276120"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019410","name":"Yunnan Fundamental Research Projects","doi-asserted-by":"publisher","award":["202301AV070004"],"award-info":[{"award-number":["202301AV070004"]}],"id":[{"id":"10.13039\/501100019410","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019410","name":"Yunnan Fundamental Research Projects","doi-asserted-by":"publisher","award":["202401AS070106"],"award-info":[{"award-number":["202401AS070106"]}],"id":[{"id":"10.13039\/501100019410","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tip.2025.3630883","type":"journal-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:52:02Z","timestamp":1763146322000},"page":"7558-7571","source":"Crossref","is-referenced-by-count":2,"title":["Disentangling Inter- and Intra-Video Relations for Multi-Event Video-Text Retrieval and Grounding"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5717-5290","authenticated-orcid":false,"given":"Mengzhao","family":"Wang","sequence":"first","affiliation":[{"name":"Faculty of Information Engineering and Automation and the Key Laboratory of Artificial Intelligence in Yunnan Province, Kunming University of Science and Technology, Kunming, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2462-6174","authenticated-orcid":false,"given":"Huafeng","family":"Li","sequence":"additional","affiliation":[{"name":"Faculty of Information Engineering and Automation and the Key Laboratory of Artificial Intelligence in Yunnan Province, Kunming University of Science and Technology, Kunming, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2347-5642","authenticated-orcid":false,"given":"Yafei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Faculty of Information Engineering and Automation and the Key Laboratory of Artificial Intelligence in Yunnan Province, Kunming University of Science and Technology, Kunming, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5156-0305","authenticated-orcid":false,"given":"Jinxing","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0783-5273","authenticated-orcid":false,"given":"Dapeng","family":"Tao","sequence":"additional","affiliation":[{"name":"FIST LAB, School of Information Science and Engineering, Yunnan University, Kunming, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4012-461X","authenticated-orcid":false,"given":"Zhengtao","family":"Yu","sequence":"additional","affiliation":[{"name":"Faculty of Information Engineering and Automation and the Key Laboratory of Artificial Intelligence in Yunnan Province, Kunming University of Science and Technology, Kunming, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531950"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00495"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02021"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3275071"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3048680"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3527369"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350906"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3140611"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.116850"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346997"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-022-00227-8"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3263725"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2025.3535345"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2025.3565981"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.287"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612349"},{"key":"ref18","article-title":"Learning language-visual embedding for movie understanding with natural-language","author":"Torabi","year":"2016","journal-title":"arXiv:1609.08124"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475515"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00504"},{"key":"ref23","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6984"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01248"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00695"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01082"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3023339"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2965987"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3086591"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01186"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01511"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6820"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20263"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1157"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3058614"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01266"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3168424"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19902"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3120545"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3096087"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00711"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475278"},{"key":"ref44","first-page":"3059","article-title":"Weakly supervised dense event captioning in videos","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Duan"},{"key":"ref45","article-title":"Weakly-supervised multi-level attentional reconstruction network for grounding textual queries in videos","author":"Song","year":"2020","journal-title":"arXiv:2003.07048"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01353"},{"key":"ref47","first-page":"41340","article-title":"Momentor: Advancing video large language model with fine-grained temporal reasoning","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Long"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i7.32784"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.3390\/app14051894"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3566695"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.119"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"ref55","article-title":"Finding moments in video collections using natural language","author":"Escorcia","year":"2019","journal-title":"arXiv:1907.12763"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_27"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3120038"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462874"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547976"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059295"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3150959"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33718-5_11"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00207"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/83\/10795784\/11249718.pdf?arnumber=11249718","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T19:02:57Z","timestamp":1764010977000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249718\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":65,"URL":"https:\/\/doi.org\/10.1109\/tip.2025.3630883","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"value":"1057-7149","type":"print"},{"value":"1941-0042","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}