{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T09:27:02Z","timestamp":1758274022232,"version":"3.32.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100010428","name":"Innovation and Technology Fund","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010428","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1109\/bibm62325.2024.10822757","type":"proceedings-article","created":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T20:12:45Z","timestamp":1736539965000},"page":"1573-1578","source":"Crossref","is-referenced-by-count":1,"title":["Bootstrapping Radiography Pre-training via Siamese Masked Vision-Language Modeling with Complementary Self-distillation"],"prefix":"10.1109","author":[{"given":"Wenqiang","family":"Li","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology,Department of Computer Science and Engineering,Hong Kong,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luyang","family":"Luo","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology,Department of Computer Science and Engineering,Hong Kong,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Chen","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology,HKUST Shenzhen-Hong Kong Collaborative Innovation Research Institute,Department of Computer Science and Engineering Department of Chemical and Biological Engineering,Hong Kong,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"article-title":"Improved baselines with momentum contrastive learning","year":"2020","author":"Chen","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"article-title":"Advancing radiograph representation learning with masked record modeling","year":"2023","author":"Zhou","key":"ref5"},{"key":"ref6","first-page":"33 536","article-title":"Multi-granularity cross-modal alignment for generalized medical visual representation learning","volume":"35","author":"Wang","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43904-9_48"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref8"},{"article-title":"Vision transformers need registers","year":"2023","author":"Darcet","key":"ref9"},{"article-title":"ibot: Image bert pre-training with online tokenizer","year":"2021","author":"Zhou","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_7"},{"key":"ref12","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00358"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02016"},{"article-title":"Dinov2: Learning robust visual features without supervision","year":"2023","author":"Oquab","key":"ref16"},{"article-title":"Towards general purpose vision foundation models for medical image analysis: An experimental study of dinov2 on radiology benchmarks","year":"2023","author":"Baharoon","key":"ref17"},{"key":"ref18","first-page":"2","article-title":"Contrastive learning of medical visual representations from paired images and text","volume-title":"Machine Learning for Healthcare Conference","author":"Zhang"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0322-0"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.369"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1148\/ryai.2019180041"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-020-76550-z"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref24"}],"event":{"name":"2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","start":{"date-parts":[[2024,12,3]]},"location":"Lisbon, Portugal","end":{"date-parts":[[2024,12,6]]}},"container-title":["2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10821710\/10821711\/10822757.pdf?arnumber=10822757","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T10:21:51Z","timestamp":1736590911000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10822757\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/bibm62325.2024.10822757","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]}}}