{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:57:01Z","timestamp":1776085021808,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T00:00:00Z","timestamp":1730505600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T00:00:00Z","timestamp":1730505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20180594"],"award-info":[{"award-number":["BK20180594"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20231036"],"award-info":[{"award-number":["BK20231036"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s00530-024-01458-x","type":"journal-article","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T12:02:00Z","timestamp":1730548920000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Vicsgaze: a gaze estimation method using self-supervised contrastive learning"],"prefix":"10.1007","volume":"30","author":[{"given":"De","family":"Gu","sequence":"first","affiliation":[]},{"given":"Minghao","family":"Lv","sequence":"additional","affiliation":[]},{"given":"Jianchu","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,2]]},"reference":[{"key":"1458_CR1","doi-asserted-by":"crossref","unstructured":"Bao, Y., Cheng, Y., Liu, Y., et\u00a0al.: Adaptive feature fusion network for gaze tracking in mobile tablets. In: 2020 25th International Conference on Pattern Recognition (ICPR), IEEE, pp. 9936\u20139943 (2021)","DOI":"10.1109\/ICPR48806.2021.9412205"},{"key":"1458_CR2","unstructured":"Bardes, A., Ponce, J., LeCun, Y.: VICReg: Variance-invariance-covariance regularization for self-supervised learning. In: International Conference on Learning Representations (2022)"},{"key":"1458_CR3","first-page":"9912","volume":"33","author":"M Caron","year":"2020","unstructured":"Caron, M., Misra, I., Mairal, J., et al.: Unsupervised learning of visual features by contrasting cluster assignments. Adv. Neural Inform. Process. Syst. 33, 9912\u20139924 (2020)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"1458_CR4","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I. et\u00a0al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"1458_CR5","doi-asserted-by":"crossref","unstructured":"Castner, N., Kuebler, T.C., Scheiter, K,. et\u00a0al.: Deep semantic gaze embedding and scanpath comparison for expertise classification during opt viewing. In: ACM Symposium on Eye Tracking Research and Applications, pp. 1\u201310 (2020)","DOI":"10.1145\/3379155.3391320"},{"key":"1458_CR6","unstructured":"Chen, T., Kornblith, S., Norouzi, M., et\u00a0al.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, PMLR, pp. 1597\u20131607 (2020)"},{"key":"1458_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple Siamese representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15750\u201315758 (2021)","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"1458_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Xie, S., He, K.: An empirical study of training self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9640\u20139649 (2021)","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"1458_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Z., Shi, B.E.: Appearance-based gaze estimation using dilated-convolutions. In: Asian Conference on Computer Vision, Springer, pp. 309\u2013324 (2018)","DOI":"10.1007\/978-3-030-20876-9_20"},{"key":"1458_CR10","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Lu, F.: Gaze estimation using transformer. In: 2022 26th International Conference on Pattern Recognition (ICPR), IEEE, pp. 3341\u20133347 (2022)","DOI":"10.1109\/ICPR56361.2022.9956687"},{"key":"1458_CR11","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Lu, F., Zhang, X.: Appearance-based gaze estimation via evaluation-guided asymmetric regression. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 100\u2013115 (2018)","DOI":"10.1007\/978-3-030-01264-9_7"},{"key":"1458_CR12","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Huang, S., Wang, F. et\u00a0al.: A coarse-to-fine adaptive network for appearance-based gaze estimation. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 10623\u201310630 (2020a)","DOI":"10.1609\/aaai.v34i07.6636"},{"key":"1458_CR13","doi-asserted-by":"publisher","first-page":"5259","DOI":"10.1109\/TIP.2020.2982828","volume":"29","author":"Y Cheng","year":"2020","unstructured":"Cheng, Y., Zhang, X., Lu, F., et al.: Gaze estimation by exploring two-eye asymmetry. IEEE Trans. Image Process. 29, 5259\u20135272 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"1458_CR14","unstructured":"Cheng, Y., Wang, H., Bao, Y. et\u00a0al.: Appearance-based gaze estimation with deep learning: a review and benchmark. arXiv preprint arXiv:2104.12668 (2021)"},{"key":"1458_CR15","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, X., Han, J., et\u00a0al.: Diverse branch block: Building a convolution as an inception-like unit. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10886\u201310895 (2021)","DOI":"10.1109\/CVPR46437.2021.01074"},{"key":"1458_CR16","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, X., Ma, N., et\u00a0al.: Repvgg: making vgg-style convnets great again. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13733\u201313742 (2021)","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"1458_CR17","doi-asserted-by":"crossref","unstructured":"Doersch, C., Zisserman, A.: Multi-task self-supervised visual learning. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2051\u20132060 (2017)","DOI":"10.1109\/ICCV.2017.226"},{"key":"1458_CR18","doi-asserted-by":"crossref","unstructured":"Dong, X., Bao, J., Zhang, T., et\u00a0al.: Bootstrapped masked autoencoders for vision bert pretraining. In: European Conference on Computer Vision, Springer, pp. 247\u2013264 (2022)","DOI":"10.1007\/978-3-031-20056-4_15"},{"key":"1458_CR19","unstructured":"Du, L., Lan, G.: Freegaze: resource-efficient gaze estimation via frequency domain contrastive learning. arXiv preprint arXiv:2209.06692 (2022)"},{"key":"1458_CR20","unstructured":"Du, L., Zhang, X., Lan, G.: Unsupervised gaze-aware contrastive learning with subject-specific condition. arXiv preprint arXiv:2309.04506 (2023)"},{"key":"1458_CR21","unstructured":"Farkhondeh, A., Palmero, C., Scardapane, S., et\u00a0al.: Towards self-supervised gaze estimation. arXiv preprint arXiv:2203.10974 (2022)"},{"key":"1458_CR22","doi-asserted-by":"crossref","unstructured":"Fischer, T., Chang, H.J., Demiris, Y.: Rt-gene: real-time eye gaze estimation in natural environments. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 334\u2013352 (2018)","DOI":"10.1007\/978-3-030-01249-6_21"},{"key":"1458_CR23","unstructured":"Mora, K.A.F., Monay, F., Odobez, J.M.: Eyediap: a database for the development and evaluation of gaze estimation algorithms from rgb and rgb-d cameras. In: Proceedings of the Symposium on Eye Tracking Research and Applications, pp. 255\u2013258 (2014)"},{"key":"1458_CR24","doi-asserted-by":"crossref","unstructured":"Gidaris, S., Bursuc, A., Puy, G., et\u00a0al.: Obow: Online bag-of-visual-words generation for self-supervised learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6830\u20136840 (2021)","DOI":"10.1109\/CVPR46437.2021.00676"},{"key":"1458_CR25","first-page":"21271","volume":"33","author":"JB Grill","year":"2020","unstructured":"Grill, J.B., Strub, F., Altch\u00e9, F., et al.: Bootstrap your own latent-a new approach to self-supervised learning. Adv. Neural. Inf. Process. Syst. 33, 21271\u201321284 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1458_CR26","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., et\u00a0al.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1458_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., et\u00a0al.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"1458_CR28","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., et\u00a0al.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"1458_CR29","doi-asserted-by":"crossref","unstructured":"Hu, M., Feng, J., Hua, J. et\u00a0al.: Online convolutional re-parameterization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 568\u2013577 (2022)","DOI":"10.1109\/CVPR52688.2022.00065"},{"key":"1458_CR30","doi-asserted-by":"crossref","unstructured":"Kellnhofer, P., Recasens, A., Stent, S. et\u00a0al.: Gaze360: Physically unconstrained gaze estimation in the wild. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6912\u20136921 (2019)","DOI":"10.1109\/ICCV.2019.00701"},{"issue":"2","key":"1458_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3361330","volume":"39","author":"R Konrad","year":"2020","unstructured":"Konrad, R., Angelopoulos, A., Wetzstein, G.: Gaze-contingent ocular parallax rendering for virtual reality. ACM Trans. Graph. (TOG) 39(2), 1\u201312 (2020)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"1458_CR32","doi-asserted-by":"crossref","unstructured":"Krafka, K., Khosla, A., Kellnhofer, P., et\u00a0al.: Eye tracking for everyone. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2176\u20132184 (2016)","DOI":"10.1109\/CVPR.2016.239"},{"key":"1458_CR33","doi-asserted-by":"crossref","unstructured":"Kyt\u00f6, M., Ens, B., Piumsomboon, T., et\u00a0al.: Pinpointing: Precise head-and eye-based target selection for augmented reality. In: Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems, pp. 1\u201314 (2018)","DOI":"10.1145\/3173574.3173655"},{"key":"1458_CR34","doi-asserted-by":"crossref","unstructured":"Liu, J., Huang, X., Zheng, J., et\u00a0al.: Mixmae: mixed and masked autoencoder for efficient pretraining of hierarchical vision transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6252\u20136261 (2023)","DOI":"10.1109\/CVPR52729.2023.00605"},{"key":"1458_CR35","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H.T., et\u00a0al.: Shufflenet v2: practical guidelines for efficient cnn architecture design. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 116\u2013131 (2018)","DOI":"10.1007\/978-3-030-01264-9_8"},{"issue":"86","key":"1458_CR36","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten, L., Hinton, G.: Visualizing data using t-sne. J. Mach. Learn. Res. 9(86), 2579\u20132605 (2008)","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"1458_CR37","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1109\/TIV.2018.2804160","volume":"3","author":"S Martin","year":"2018","unstructured":"Martin, S., Vora, S., Yuen, K., et al.: Dynamics of driver\u2019s gaze: explorations in behavior modeling and maneuver prediction. IEEE Trans. Intell. Veh. 3(2), 141\u2013150 (2018)","journal-title":"IEEE Trans. Intell. Veh."},{"key":"1458_CR38","doi-asserted-by":"crossref","unstructured":"Park, S., Mello, S.D., Molchanov, P., et\u00a0al.: Few-shot adaptive gaze estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9368\u20139377 (2019)","DOI":"10.1109\/ICCV.2019.00946"},{"key":"1458_CR39","doi-asserted-by":"publisher","first-page":"103369","DOI":"10.1016\/j.jvcir.2021.103369","volume":"81","author":"D Ren","year":"2021","unstructured":"Ren, D., Chen, J., Zhong, J., et al.: Gaze estimation via bilinear pooling-based attention networks. J. Vis. Commun. Image Represent. 81, 103369 (2021)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"1458_CR40","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Pfister, T., Tuzel, O., et\u00a0al.: Learning from simulated and unsupervised images through adversarial training. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2107\u20132116 (2017)","DOI":"10.1109\/CVPR.2017.241"},{"key":"1458_CR41","doi-asserted-by":"crossref","unstructured":"Smith, B.A., Yin, Q., Feiner, S.K., et\u00a0al.: Gaze locking: passive eye contact detection for human-object interaction. In: Proceedings of the 26th Annual ACM Symposium on User Interface Software and Technology, pp. 271\u2013280 (2013)","DOI":"10.1145\/2501988.2501994"},{"key":"1458_CR42","doi-asserted-by":"crossref","unstructured":"Stellmach, S., Stober, S., N\u00fcrnberger, A., et\u00a0al.: Designing gaze-supported multimodal interactions for the exploration of large image collections. In: Proceedings of the 1st Conference on Novel Gaze-Controlled Applications, pp. 1\u20138 (2011)","DOI":"10.1145\/1983302.1983303"},{"key":"1458_CR43","doi-asserted-by":"crossref","unstructured":"Sun, Y., Zeng, J., Shan, S., et\u00a0al.: Cross-encoder for unsupervised gaze representation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3702\u20133711 (2021)","DOI":"10.1109\/ICCV48922.2021.00368"},{"key":"1458_CR44","unstructured":"Vaswani, A., Shazeer, N., Parmar, N. et al.: Attention is all you need. In: Advances in neural information processing systems, pp. 5998\u20136008 (2017)"},{"key":"1458_CR45","doi-asserted-by":"publisher","first-page":"106994","DOI":"10.1016\/j.engappai.2023.106994","volume":"126","author":"C Wu","year":"2023","unstructured":"Wu, C., Hu, H., Lin, K., et al.: Attention-guided and fine-grained feature extraction from face images for gaze estimation. Eng. Appl. Artif. Intell. 126, 106994 (2023)","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"8","key":"1458_CR46","doi-asserted-by":"publisher","first-page":"5510","DOI":"10.1109\/TCSVT.2022.3152800","volume":"32","author":"Y Wu","year":"2022","unstructured":"Wu, Y., Li, G., Liu, Z., et al.: Gaze estimation via modulation-based adaptive network with auxiliary self-learning. IEEE Trans. Circuits Syst. Video Technol. 32(8), 5510\u20135520 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1458_CR47","doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S.X., et\u00a0al.: Unsupervised feature learning via non-parametric instance discrimination. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3733\u20133742 (2018)","DOI":"10.1109\/CVPR.2018.00393"},{"key":"1458_CR48","doi-asserted-by":"crossref","unstructured":"Xie, Z., Zhang, Z., Cao, Y., et\u00a0al.: Simmim: a simple framework for masked image modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9653\u20139663 (2022)","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"1458_CR49","unstructured":"Zbontar, J., Jing, L., Misra, I., et\u00a0al.: Barlow twins: self-supervised learning via redundancy reduction. In: International Conference on Machine Learning, PMLR, pp. 12310\u201312320 (2021)"},{"key":"1458_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, X., Sugano, Y., Fritz, M., et\u00a0al.: It\u2019s written all over your face: full-face appearance-based gaze estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 51\u201360 (2017)","DOI":"10.1109\/CVPRW.2017.284"},{"issue":"1","key":"1458_CR51","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1109\/TPAMI.2017.2778103","volume":"41","author":"X Zhang","year":"2017","unstructured":"Zhang, X., Sugano, Y., Fritz, M., et al.: Mpiigaze: real-world dataset and deep appearance-based gaze estimation. IEEE Trans. Pattern Anal. Mach. Intell. 41(1), 162\u2013175 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1458_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, X., Park, S., Beeler, T., et\u00a0al.: Eth-xgaze: a large scale dataset for gaze estimation under extreme head pose and gaze variation. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part V 16, Springer, pp. 365\u2013381 (2020)","DOI":"10.1007\/978-3-030-58558-7_22"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01458-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01458-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01458-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T09:09:56Z","timestamp":1734340196000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01458-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,2]]},"references-count":52,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1458"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01458-x","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,2]]},"assertion":[{"value":"29 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"330"}}