{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,12]],"date-time":"2026-02-12T17:28:13Z","timestamp":1770917293936,"version":"3.50.1"},"reference-count":88,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/tpami.2022.3178485","type":"journal-article","created":{"date-parts":[[2022,5,27]],"date-time":"2022-05-27T21:02:37Z","timestamp":1653685357000},"page":"1-1","source":"Crossref","is-referenced-by-count":5,"title":["Efficient Image and Sentence Matching"],"prefix":"10.1109","author":[{"given":"Yan","family":"Huang","sequence":"first","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratoryof Pattern Recognition (NLPR), the Center for Excellence in Brain Science and Intelligence Technology (CEBSIT), Institute of Automation, Chinese Academy of Sciences (CASIA), and the School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuming","family":"Wang","sequence":"additional","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratoryof Pattern Recognition (NLPR), the Center for Excellence in Brain Science and Intelligence Technology (CEBSIT), Institute of Automation, Chinese Academy of Sciences (CASIA), and the School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Wang","sequence":"additional","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratoryof Pattern Recognition (NLPR), the Center for Excellence in Brain Science and Intelligence Technology (CEBSIT), Institute of Automation, Chinese Academy of Sciences (CASIA), and the School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0042-6989(97)00121-1"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/341"},{"key":"ref4","first-page":"742","article-title":"Learning efficient object detection models with knowledge distillation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01267"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01553"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref8","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"Chung","year":"2014"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01138"},{"key":"ref10","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref11","first-page":"231","article-title":"A unified Bias-variance decomposition","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Domingos"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00667"},{"key":"ref13","article-title":"VSE++: Improved visual-semantic embeddings","author":"Faghri","year":"2017"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00425"},{"key":"ref15","first-page":"2121","article-title":"DeViSE: A deep visual-semantic embedding model","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Frome"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00473"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00750"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref19","article-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018489"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3052490"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00587"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.767"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00645"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2883466"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"ref29","first-page":"1889","article-title":"Deep fragment embeddings for bidirectional image sentence mapping","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Karpathy"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1080\/00031305.2016.1277159"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1139"},{"key":"ref32","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"key":"ref33","article-title":"Unifying visual-semantic embeddings with multimodal neural language models","author":"Kiros","year":"2014"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref35","article-title":"ALBERT: A lite BERT for self-supervised learning of language representations","author":"Lan","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_50"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00475"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01093"},{"key":"ref43","article-title":"DARTS: Differentiable architecture search","author":"Liu","year":"2018"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00271"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00726"},{"key":"ref46","article-title":"ViLBERT: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","author":"Lu","year":"2019"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01045"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01074"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107351"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"ref53","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Paszke"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00511"},{"key":"ref55","article-title":"Cream of the crop: Distilling prioritized paths for one-shot neural architecture search","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Peng"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383266"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.303"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413961"},{"key":"ref59","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref61","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Ren"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/720"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01280"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"ref66","article-title":"EfficientNet: Rethinking model scaling for convolutional neural networks","author":"Tan","year":"2019"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00145"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00871"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2797921"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"},{"key":"ref71","first-page":"1","article-title":"MiniLM: Deep self-attention distillation for task-agnostic compression of pre-trained transformers","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/526"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00586"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6915"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00590"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01095"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3030656"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00128"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350940"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58595-2_40"},{"key":"ref82","article-title":"Billion-scale semi-supervised learning for image classification","author":"Yalniz","year":"2019"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298966"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00302"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01389"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00381"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00359"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/4359286\/09783034.pdf?arnumber=9783034","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T01:53:50Z","timestamp":1706752430000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9783034\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":88,"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3178485","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}