{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T15:55:16Z","timestamp":1784217316794,"version":"3.55.0"},"reference-count":60,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62525103"],"award-info":[{"award-number":["62525103"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62021002"],"award-info":[{"award-number":["62021002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62571294"],"award-info":[{"award-number":["62571294"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62441235"],"award-info":[{"award-number":["62441235"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Natural Science Foundation","award":["L252009"],"award-info":[{"award-number":["L252009"]}]},{"name":"Beijing Natural Science Foundation","award":["L257005"],"award-info":[{"award-number":["L257005"]}]},{"name":"CCF-DiDi GAIA Collaborative Research Funds"},{"name":"Postdoctoral Science Foundation, China","award":["2024M750565"],"award-info":[{"award-number":["2024M750565"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tnnls.2025.3605657","type":"journal-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T17:32:58Z","timestamp":1757439178000},"page":"17527-17540","source":"Crossref","is-referenced-by-count":4,"title":["Temporal Modeling With Frozen Vision\u2013Language Foundation Models for Parameter-Efficient Text\u2013Video Retrieval"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7742-9142","authenticated-orcid":false,"given":"Leqi","family":"Shen","sequence":"first","affiliation":[{"name":"Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1952-6083","authenticated-orcid":false,"given":"Tianxiang","family":"Hao","sequence":"additional","affiliation":[{"name":"Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tao","family":"He","sequence":"additional","affiliation":[{"name":"GRG Banking Equipment Company Ltd., Guangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yifeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"JD.com Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pengzhang","family":"Liu","sequence":"additional","affiliation":[{"name":"JD.com Inc., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5843-6411","authenticated-orcid":false,"given":"Sicheng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4361-956X","authenticated-orcid":false,"given":"Jungong","family":"Han","sequence":"additional","affiliation":[{"name":"Department of Automation, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0137-9975","authenticated-orcid":false,"given":"Guiguang","family":"Ding","sequence":"additional","affiliation":[{"name":"Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00495"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01434"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00264"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01025"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00244"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01820"},{"key":"ref9","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref10","first-page":"11285","article-title":"TinyTL: Reduce memory, not parameters for efficient on-device learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Cai"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"ref12","article-title":"Towards a unified view of parameter-efficient transfer learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"He"},{"key":"ref13","first-page":"16664","article-title":"AdaptFormer: Adapting vision transformers for scalable visual recognition","volume-title":"Proc. NIPS","author":"Chen"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00635"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_7"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref20","article-title":"Clip-vip: Adapting pre-trained image-text model to video-language alignment","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Xue"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24947-6_17"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3214208"},{"key":"ref26","article-title":"Learning language-visual embedding for movie understanding with natural-language","author":"Torabi","year":"2016","journal-title":"arXiv:1609.08124"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01065"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059295"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2017.347"},{"key":"ref31","article-title":"FastVID: Dynamic density pruning for fast video large language models","author":"Shen","year":"2025","journal-title":"arXiv:2503.11187"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2997020"},{"key":"ref33","article-title":"Tempme: Video temporal token merging for efficient text-video retrieval","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Shen"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3715137"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01835"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3331841"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3335859"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3458898"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3381347"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045336"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01566"},{"key":"ref44","first-page":"3882","article-title":"Diffusion-inspired truncated sampler for text-video retrieval","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"37","author":"Wang"},{"key":"ref45","article-title":"Consolidator: Mergable adapter with group connections for visual adaptation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Hao"},{"key":"ref46","first-page":"109","article-title":"Scaling & shifting your features: A new baseline for efficient model tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Lian"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02563"},{"key":"ref48","article-title":"Representation learning with contrastive predictive coding","author":"van den Oord","year":"2018","journal-title":"arXiv:1807.03748"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref50","article-title":"Improving video-text retrieval by multi-stream corpus alignment and dual softmax loss","author":"Cheng","year":"2021","journal-title":"arXiv:2109.04290"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00513"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00043"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_13"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00272"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_23"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00725"},{"key":"ref58","article-title":"SGDR: Stochastic gradient descent with warm restarts","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Loshchilov"},{"key":"ref59","article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","author":"Goyal","year":"2017","journal-title":"arXiv:1706.02677"},{"key":"ref60","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume-title":"Proc. 13th Int. Conf. Artif. Intell. Statist.","author":"Glorot"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/11195929\/11153787.pdf?arnumber=11153787","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T18:41:36Z","timestamp":1765219296000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11153787\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":60,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2025.3605657","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}