{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T03:26:42Z","timestamp":1779247602201,"version":"3.51.4"},"reference-count":78,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2023,5,1]],"date-time":"2023-05-01T00:00:00Z","timestamp":1682899200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,5,1]],"date-time":"2023-05-01T00:00:00Z","timestamp":1682899200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,1]],"date-time":"2023-05-01T00:00:00Z","timestamp":1682899200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906050"],"award-info":[{"award-number":["61906050"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62171252"],"award-info":[{"award-number":["62171252"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61701245"],"award-info":[{"award-number":["61701245"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62071272"],"award-info":[{"award-number":["62071272"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61701247"],"award-info":[{"award-number":["61701247"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2018AAA0102600"],"award-info":[{"award-number":["2018AAA0102600"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0130000"],"award-info":[{"award-number":["2020AAA0130000"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"Post-Doctoral Science Foundation of China","doi-asserted-by":"publisher","award":["2021M701903"],"award-info":[{"award-number":["2021M701903"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"Post-Doctoral Science Foundation of China","doi-asserted-by":"publisher","award":["2022M710467"],"award-info":[{"award-number":["2022M710467"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2023,5]]},"DOI":"10.1109\/tcsvt.2022.3221611","type":"journal-article","created":{"date-parts":[[2022,11,10]],"date-time":"2022-11-10T20:38:03Z","timestamp":1668112683000},"page":"2275-2289","source":"Crossref","is-referenced-by-count":32,"title":["Embedding Global Contrastive and Local Location in Self-Supervised Learning"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8339-5081","authenticated-orcid":false,"given":"Wenyi","family":"Zhao","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2609-2460","authenticated-orcid":false,"given":"Chongyi","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanyang Technological University, Jurong West, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2495-4469","authenticated-orcid":false,"given":"Weidong","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Henan Institute of Science and Technology, Xinxiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3857-3982","authenticated-orcid":false,"given":"Lu","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7143-9569","authenticated-orcid":false,"given":"Peixian","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Knowledge Automation for Industrial Processes, Ministry of Education, School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9402-0421","authenticated-orcid":false,"given":"Lingqiao","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Information Security, Guilin University of Electronic Technology, Guilin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kefeng","family":"Fan","sequence":"additional","affiliation":[{"name":"Chinese Electronics Standardization Institute, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6334-4044","authenticated-orcid":false,"given":"Huihua","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Improved baselines with momentum contrastive learning","author":"Chen","year":"2020","journal-title":"arXiv:2003.04297"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3003048"},{"key":"ref5","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Chen"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00304"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3169145"},{"key":"ref8","first-page":"9912","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Caron"},{"key":"ref9","first-page":"21271","article-title":"Bootstrap your own latent\u2014A new approach to self-supervised learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Grill"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3169469"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01136"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3124908"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3080928"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2022.108168"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01091"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01556"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00948"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01608"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3135470"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/91.236554"},{"key":"ref22","first-page":"12638","article-title":"Joint contrastive learning with infinite possibilities","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Cai"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00120"},{"key":"ref24","article-title":"RegionCL: Can simple region swapping contribute to contrastive learning?","author":"Xu","year":"2021","journal-title":"arXiv:2111.12309"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01061"},{"key":"ref26","first-page":"1","article-title":"Unsupervised representation learning by predicting image rotations","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Gidaris"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"ref28","article-title":"Representation learning with contrastive predictive coding","author":"Van Den Oord","year":"2018","journal-title":"arXiv:1807.03748"},{"key":"ref29","first-page":"1849","article-title":"Improved deep metric learning with multi-class N-pair loss objective","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Sohn"},{"key":"ref30","first-page":"18661","article-title":"Supervised contrastive learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Khosla"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.626"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_16"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3141051"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3004453"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01405"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"ref40","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2012"},{"key":"ref41","first-page":"215","article-title":"An analysis of single-layer networks in unsupervised feature learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist. (AISTATS)","author":"Coates"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00264"},{"key":"ref43","article-title":"Microsoft COCO captions: Data collection and evaluation server","author":"Chen","year":"2015","journal-title":"arXiv:1504.00325"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3075607"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3063604"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3029901"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"ref48","first-page":"1","article-title":"Prototypical contrastive learning of unsupervised representations","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Li"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01000"},{"key":"ref50","first-page":"12310","article-title":"Barlow Twins: Self-supervised learning via redundancy reduction","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Zbontar"},{"key":"ref51","first-page":"1","article-title":"What makes instance discrimination good for transfer learning?","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Zhao"},{"key":"ref52","first-page":"1","article-title":"Self-labelling via simultaneous clustering and representation learning","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Asano"},{"key":"ref53","volume-title":"MMSelfSup: Openmmlab Self-Supervised Learning Toolbox and Benchmark","author":"Contributors","year":"2021"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2969255"},{"key":"ref56","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2016","journal-title":"arXiv:1612.03144"},{"key":"ref57","volume-title":"Detectron2","author":"Wu","year":"2019"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00393"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00156"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00674"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3068749"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00075"},{"key":"ref63","first-page":"4175","article-title":"Balanced meta-softmax for long-tailed visual recognition","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Ren"},{"key":"ref64","first-page":"1","article-title":"Decoupling representation and classifier for long-tailed recognition","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Kang"},{"key":"ref65","first-page":"1","article-title":"Long-tailed recognition by routing diverse distribution-aware experts","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Wang"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00239"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00963"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00914"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00550"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00682"},{"key":"ref72","article-title":"Masked autoencoders are scalable vision learners","author":"He","year":"2021","journal-title":"arXiv:2111.06377"},{"key":"ref73","article-title":"How to understand masked autoencoders","author":"Cao","year":"2022","journal-title":"arXiv:2202.03670"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"ref75","first-page":"20026","article-title":"Adversarial masking for self-supervised learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"162","author":"Shi"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"ref77","first-page":"1","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Dosovitskiy"},{"key":"ref78","article-title":"Multimodal learning with transformers: A survey","author":"Xu","year":"2022","journal-title":"arXiv:2206.06488"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/76\/10116045\/09946001.pdf?arnumber=9946001","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T03:09:58Z","timestamp":1706756998000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9946001\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5]]},"references-count":78,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2022.3221611","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5]]}}}