{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T22:09:50Z","timestamp":1758406190638,"version":"3.44.0"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032051134","type":"print"},{"value":"9783032051141","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T00:00:00Z","timestamp":1758412800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T00:00:00Z","timestamp":1758412800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05114-1_13","type":"book-chapter","created":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T14:10:36Z","timestamp":1758377436000},"page":"128-138","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Contrastive Masked Video Modeling for\u00a0Coronary Angiography Diagnosis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0210-8513","authenticated-orcid":false,"given":"Zhiming","family":"Shao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingqian","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zechen","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Ge","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guodong","family":"Ding","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liwei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yundai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0498-0432","authenticated-orcid":false,"given":"Jie","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6732-4232","authenticated-orcid":false,"given":"Hui","family":"Hui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,21]]},"reference":[{"key":"13_CR1","unstructured":"Bao, H., Dong, L., Piao, S., Wei, F.: Beit: bert pre-training of image transformers. arXiv preprint arXiv:2106.08254 (2021)"},{"key":"13_CR2","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: ICML, vol.\u00a02, p.\u00a04 (2021)"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"13_CR4","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple siamese representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15750\u201315758 (2021)","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Xie, S., He, K.: An empirical study of training self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9640\u20139649 (2021)","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"13_CR7","unstructured":"Devlin, J.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"13_CR8","unstructured":"Dosovitskiy, A.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3d: Expanding architectures for efficient video recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"13_CR11","first-page":"35946","volume":"35","author":"C Feichtenhofer","year":"2022","unstructured":"Feichtenhofer, C., Li, Y., He, K., et al.: Masked autoencoders as spatiotemporal learners. Adv. Neural. Inf. Process. Syst. 35, 35946\u201335958 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Ghadiyaram, D., Tran, D., Mahajan, D.: Large-scale weakly-supervised pre-training for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 12046\u201312055 (2019)","DOI":"10.1109\/CVPR.2019.01232"},{"key":"13_CR13","first-page":"21271","volume":"33","author":"JB Grill","year":"2020","unstructured":"Grill, J.B., et al.: Bootstrap your own latent-a new approach to self-supervised learning. Adv. Neural. Inf. Process. Syst. 33, 21271\u201321284 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: Contrastive masked autoencoders are stronger vision learners. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3336525"},{"key":"13_CR17","unstructured":"Lee, Y., Willette, J., Kim, J., Lee, J., Hwang, S.J.: Exploring the role of mean teachers in self-supervised masked auto-encoders. arXiv preprint arXiv:2210.02077 (2022)"},{"key":"13_CR18","unstructured":"Li, K., et al.: Uniformerv2: spatiotemporal learning by arming image vits with video uniformer. arXiv preprint arXiv:2211.09552 (2022)"},{"key":"13_CR19","unstructured":"Li, K., et al.: Uniformer: unified transformer for efficient spatial-temporal representation learning. In: International Conference on Learning Representations (2022)"},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Li, Y., Wu, C.Y., Fan, H., Mangalam, K., Xiong, B., Malik, J., Feichtenhofer, C.: Mvitv2: improved multiscale vision transformers for classification and detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4804\u20134814 (2022)","DOI":"10.1109\/CVPR52688.2022.00476"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"},{"issue":"1","key":"13_CR22","first-page":"7278343","volume":"2023","author":"W Lu","year":"2023","unstructured":"Lu, W., Zhang, X., Yan, G., Ma, G.: The differences of quantitative flow ratio in coronary artery stenosis with or without atrial fibrillation. J. Interv. Cardiol. 2023(1), 7278343 (2023)","journal-title":"J. Interv. Cardiol."},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Mehta, R.H., et al.: Sustained ventricular tachycardia or fibrillation in the cardiac catheterization laboratory among patients receiving primary percutaneous coronary intervention: incidence, predictors, and outcomes. J. Am. College of Cardiol. 43(10), 1765\u20131772 (2004)","DOI":"10.1016\/j.jacc.2003.09.072"},{"key":"13_CR24","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)"},{"key":"13_CR25","first-page":"10078","volume":"35","author":"Z Tong","year":"2022","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: Videomae: masked autoencoders are data-efficient learners for self-supervised video pre-training. Adv. Neural. Inf. Process. Syst. 35, 10078\u201310093 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Videomae v2: scaling video masked autoencoders with dual masking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14549\u201314560 (2023)","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: Bevt: bert pretraining of video transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14733\u201314743 (2022)","DOI":"10.1109\/CVPR52688.2022.01432"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: Masked video distillation: rethinking masked feature modeling for self-supervised video representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6312\u20136322 (2023)","DOI":"10.1109\/CVPR52729.2023.00611"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Wei, C., Fan, H., Xie, S., Wu, C.Y., Yuille, A., Feichtenhofer, C.: Masked feature prediction for self-supervised visual pre-training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14668\u201314678 (2022)","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Xie, Z., et al.: Simmim: a simple framework for masked image modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9653\u20139663 (2022)","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Yang, C., Xu, Y., Shi, J., Dai, B., Zhou, B.: Temporal pyramid network for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00067"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Yao, Y., Desai, N., Palaniswami, M.: Masked contrastive representation learning for self-supervised visual pre-training. In: 2024 IEEE 11th International Conference on Data Science and Advanced Analytics (DSAA), pp. 1\u201310. IEEE (2024)","DOI":"10.1109\/DSAA61799.2024.10722789"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05114-1_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T14:10:46Z","timestamp":1758377446000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05114-1_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,21]]},"ISBN":["9783032051134","9783032051141"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05114-1_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,21]]},"assertion":[{"value":"21 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}