{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T15:47:49Z","timestamp":1768924069375,"version":"3.49.0"},"reference-count":70,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Ningbo Clinical Research Center for Medical Imaging","award":["2021L003 (Open Project 2022LYKFZD06)"],"award-info":[{"award-number":["2021L003 (Open Project 2022LYKFZD06)"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62171377"],"award-info":[{"award-number":["62171377"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20220530161616036"],"award-info":[{"award-number":["JCYJ20220530161616036"]}]},{"DOI":"10.13039\/501100001809","name":"Innovation Foundation for Doctor Dissertation of Northwestern Polytechnical University","doi-asserted-by":"publisher","award":["CX2024016"],"award-info":[{"award-number":["CX2024016"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Med. Imaging"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1109\/tmi.2024.3431916","type":"journal-article","created":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T17:59:56Z","timestamp":1721671196000},"page":"118-129","source":"Crossref","is-referenced-by-count":14,"title":["CADS: A Self-Supervised Learner via Cross-Modal Alignment and Deep Self-Distillation for CT Volume Segmentation"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2189-6865","authenticated-orcid":false,"given":"Yiwen","family":"Ye","sequence":"first","affiliation":[{"name":"National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science and Engineering, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1647-7447","authenticated-orcid":false,"given":"Jianpeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8564-9735","authenticated-orcid":false,"given":"Ziyang","family":"Chen","sequence":"additional","affiliation":[{"name":"National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science and Engineering, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9273-2847","authenticated-orcid":false,"given":"Yong","family":"Xia","sequence":"additional","affiliation":[{"name":"National Engineering Laboratory for Integrated Aero-Space-Ground-Ocean Big Data Application Technology, School of Computer Science and Engineering, Northwestern Polytechnical University, Xi&#x2019;an, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3390\/su13031224"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref3","article-title":"Improved baselines with momentum contrastive learning","author":"Chen","year":"2020","journal-title":"arXiv:2003.04297"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3497510"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"ref6","article-title":"PGL: Prior-guided local self-supervised learning for 3D medical image segmentation","author":"Xie","year":"2020","journal-title":"arXiv:2011.12640"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87193-2_59"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19803-8_31"},{"key":"ref9","article-title":"BEiT: BERT pre-training of image transformers","author":"Bao","year":"2021","journal-title":"arXiv:2106.08254"},{"key":"ref10","article-title":"BEiT v2: Masked image modeling with vector-quantized visual tokenizers","author":"Peng","year":"2022","journal-title":"arXiv:2208.06366"},{"key":"ref11","article-title":"Image as a foreign language: BEiT pretraining for all vision and vision-language tasks","author":"Wang","year":"2022","journal-title":"arXiv:2208.10442"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16452-1_9"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI53787.2023.10230477"},{"key":"ref15","first-page":"1","article-title":"iBOT: Image BERT pre-training with online tokenizer","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Zhou"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16440-8_53"},{"key":"ref17","article-title":"Context autoencoder for self-supervised representation learning","author":"Chen","year":"2022","journal-title":"arXiv:2202.03026"},{"key":"ref18","article-title":"Contrastive masked autoencoders are stronger vision learners","author":"Huang","year":"2022","journal-title":"arXiv:2207.13532"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02016"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1088\/0031-9155\/45\/10\/305"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-020-01008-z"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87199-4_16"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16440-8_52"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525087"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref28","first-page":"9912","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Caron"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01608"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_27"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00120"},{"key":"ref32","article-title":"A survey on masked autoencoder for self-supervised learning in vision and beyond","author":"Zhang","year":"2022","journal-title":"arXiv:2208.00173"},{"key":"ref33","article-title":"Uniform masking: Enabling MAE pre-training for pyramid-based vision transformers with locality","author":"Li","year":"2022","journal-title":"arXiv:2205.10063"},{"key":"ref34","first-page":"1","article-title":"Green hierarchical vision transformer for masked image modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Huang"},{"key":"ref35","article-title":"ConvMAE: Masked convolution meets masked autoencoders","author":"Gao","year":"2022","journal-title":"arXiv:2205.03892"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/tiv.2023.3322409"},{"key":"ref37","article-title":"MaskViT: Masked visual pre-training for video prediction","author":"Gupta","year":"2022","journal-title":"arXiv:2206.11894"},{"key":"ref38","article-title":"Masked visual pre-training for motor control","author":"Xiao","year":"2022","journal-title":"arXiv:2203.06173"},{"key":"ref39","article-title":"IBoot: Image-bootstrapped self-supervised video representation learning","author":"Saleh","year":"2022","journal-title":"arXiv:2206.08339"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI.2017.7950587"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32251-9_46"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2020.101746"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2019.101539"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2020.101840"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00920"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02007"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-78191-0_51"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19803-8_33"},{"key":"ref50","article-title":"Multimodal learning with transformers: A survey","author":"Xu","year":"2022","journal-title":"arXiv:2206.06488"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16443-9_65"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_20"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-022-0274-8"},{"key":"ref55","first-page":"1","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. ICLR","author":"Dosovitskiy"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01058"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.103023"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1117\/1.JMI.5.3.036501"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00965"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102680"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2020.101821"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-30695-9"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00125"},{"key":"ref65","first-page":"1","article-title":"Weight normalization: A simple reparameterization to accelerate training of deep neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Salimans"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI48211.2021.9433758"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2005.02.002"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.2214\/ajr.174.1.1740071"},{"key":"ref69","first-page":"36722","article-title":"AMOS: A large-scale abdominal multi-organ benchmark for versatile medical image segmentation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Ji"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1186\/s13104-021-05592-x"}],"container-title":["IEEE Transactions on Medical Imaging"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/42\/10820125\/10605840.pdf?arnumber=10605840","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,3]],"date-time":"2025-01-03T19:26:02Z","timestamp":1735932362000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10605840\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":70,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tmi.2024.3431916","relation":{},"ISSN":["0278-0062","1558-254X"],"issn-type":[{"value":"0278-0062","type":"print"},{"value":"1558-254X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1]]}}}