{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T14:30:24Z","timestamp":1754145024656,"version":"3.41.2"},"publisher-location":"Cham","reference-count":56,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031986871"},{"type":"electronic","value":"9783031986888"}],"license":[{"start":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T00:00:00Z","timestamp":1752710400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T00:00:00Z","timestamp":1752710400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-98688-8_18","type":"book-chapter","created":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T09:09:14Z","timestamp":1752656954000},"page":"257-274","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DiNO-Diffusion: Scaling Medical Diffusion Models via\u00a0Self-Supervised Pre-Training"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2281-107X","authenticated-orcid":false,"given":"Guillermo","family":"Jimenez-Perez","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8848-0214","authenticated-orcid":false,"given":"Pedro","family":"Os\u00f3rio","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3154-177X","authenticated-orcid":false,"given":"Josef","family":"Cersovsky","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1956-1409","authenticated-orcid":false,"given":"Javier","family":"Montalt-Tordera","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2667-6490","authenticated-orcid":false,"given":"Jens","family":"Hooge","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7046-0606","authenticated-orcid":false,"given":"Steffen","family":"Vogler","sequence":"additional","affiliation":[]},{"given":"Sadegh","family":"Mohammadi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,17]]},"reference":[{"key":"18_CR1","doi-asserted-by":"publisher","unstructured":"Kazerouni, A., et al.: Diffusion models in medical imaging: a comprehensive survey. Med. Image Anal. 88, 102846 (2023). issn: 1361\u20138415. https:\/\/doi.org\/10.1016\/j.media.2023.102846","DOI":"10.1016\/j.media.2023.102846"},{"key":"18_CR2","doi-asserted-by":"publisher","unstructured":"Osorio, P., et al.: Latent diffusion models with image-derived annotations for enhanced AI-assisted cancer diagnosis in histopathology. Diagnostics 14(13) (2024). issn: 2075-4418. https:\/\/doi.org\/10.3390\/diagnostics14131442","DOI":"10.3390\/diagnostics14131442"},{"key":"18_CR3","unstructured":"Chambon, P., et al.: Roentgen: vision-language foundation model for chest x-ray generation. arXiv preprint arXiv:2211.12737 (2022)"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Ye, J., et al.: Synthetic augmentation with large-scale unconditional pre-training. In: Greenspan, H., et al. (eds.) Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2023, pp. 754\u2013764. Springer, Cham (2023). isbn: 978-3-031-43895-0","DOI":"10.1007\/978-3-031-43895-0_71"},{"key":"18_CR5","unstructured":"Aversa, M., et al.: DiffInfinite: large mask-image synthesis via parallel random patch diffusion in histopathology. In: Oh, A., et al. (eds.) Advances in Neural Information Processing Systems, vol. 36, pp. 78126\u201378141. Curran Associates, Inc. (2023)"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Pinaya, W.H.L., et al.: Brain imaging generation with latent diffusion models. In: Mukhopadhyay, A., et al. (eds.) Deep Generative Models, pp. 117\u2013126. Springer, Cham (2022). isbn: 978-3-031-18576-2","DOI":"10.1007\/978-3-031-18576-2_12"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Tian, J., et al.: Diffuse, attend and segment: unsupervised zero-shot segmentation using stable diffusion model. arXiv preprint arXiv:2308.12469 (2023)","DOI":"10.1109\/CVPR52733.2024.00341"},{"key":"18_CR8","unstructured":"Zhang, J., et al.: A tale of two features: stable diffusion complements DINO for zero-shot semantic correspondence. In: Oh, A., et al. (eds.) Advances in Neural Information Processing Systems, vol. 36, pp. 45533\u201345547. Curran Associates, Inc. (2023)"},{"key":"18_CR9","doi-asserted-by":"publisher","unstructured":"Beddiar, D.R., Oussalah, M., Sepp\u00e4nen, T.: Automatic captioning for medical imaging (MIC): a rapid review of literature. Artif. Intell. Rev. 56(5), 4019\u20134076 (2023). issn: 1573\u20137462. https:\/\/doi.org\/10.1007\/s10462-022-10270-w","DOI":"10.1007\/s10462-022-10270-w"},{"key":"18_CR10","unstructured":"Betker, J., et al.: Improving image generation with better captions. Comput. Sci. 2(3), 8 (2023). https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"18_CR11","unstructured":"Li, J., et al.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Krause, A., et al. (eds.) Proceedings of the 40th International Conference on Machine Learning, vol. 202, pp. 19730\u201319742. Proceedings of Machine Learning Research. PMLR (2023)"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., , M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"18_CR13","unstructured":"Esser, P., et al.: Scaling rectified flow transformers for high-resolution image synthesis. arXiv preprint arXiv:2403.03206 (2024)"},{"key":"18_CR14","unstructured":"Liu, Y., et al.: Sora: a review on background, technology, limitations, and opportunities of large vision models. arXiv preprint arXiv:2402.17177 (2024)"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"18_CR16","unstructured":"Oquab, M., et al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"18_CR17","unstructured":"P\u00e9rez-Garca, F., et al.: RAD-DINO: exploring scalable medical image encoders beyond text supervision. arXiv preprint arXiv:2401.10815 (2024)"},{"key":"18_CR18","unstructured":"Dippel, J., et al.: RudolfV: a foundation model by pathologists for pathologists. arXiv preprint arXiv:2401.04079 (2024)"},{"key":"18_CR19","unstructured":"Moutakanni, T., et al.: Advancing human-centric AI for robust X-ray analysis through holistic self-supervised learning. arXiv preprint arXiv:2405.01469 (2024)"},{"key":"18_CR20","doi-asserted-by":"publisher","unstructured":"de la Iglesia Vay\u00e1, M., et al.: BIMCV COVID-19+: a large annotated dataset of RX and CT images from COVID-19 patients with Extension Part I (2023). https:\/\/doi.org\/10.21227\/f3q6-0986","DOI":"10.21227\/f3q6-0986"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Irvin, J., et al.: CheXpert: a large chest radiograph dataset with uncertainty labels and expert comparison. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, no. 01, pp. 590\u2013597 (2019)","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"18_CR22","doi-asserted-by":"publisher","unstructured":"Goldberger, A.L., et al.: PhysioBank, PhysioToolkit, and PhysioNet. Circulation 101(23), e215\u2013e220 (2000). https:\/\/doi.org\/10.1161\/01.CIR.101.23.e215. https:\/\/www.ahajournals.org\/doi\/pdf\/10.1161\/01. CIR.101.23.e215","DOI":"10.1161\/01.CIR.101.23.e215"},{"key":"18_CR23","doi-asserted-by":"publisher","unstructured":"Johnson, A..E.W., et al.: MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports. Sci. Data 6(1), 317 (2019). issn: 2052-4463. https:\/\/doi.org\/10.1038\/s41597-019-0322-0","DOI":"10.1038\/s41597-019-0322-0"},{"key":"18_CR24","doi-asserted-by":"publisher","unstructured":"Demner-Fushman, D., et al.: Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Med. Inf. Assoc. 23(2), 304\u2013310 (2015). issn: 1067-5027. https:\/\/doi.org\/10.1093\/jamia\/ocv080. https:\/\/academic.oup.com\/jamia\/article-pdf\/23\/2\/304\/34147537\/ocv080.pdf","DOI":"10.1093\/jamia\/ocv080"},{"key":"18_CR25","doi-asserted-by":"publisher","unstructured":"Tabik, S., et al.: COVIDGR dataset and COVID-SDNet methodology for predicting COVID-19 based on chest X-ray images. IEEE J. Biomed. Health Inf. 24(12), 3595\u20133605 (2020). https:\/\/doi.org\/10.1109\/JBHI.2020.3037127","DOI":"10.1109\/JBHI.2020.3037127"},{"key":"18_CR26","doi-asserted-by":"publisher","unstructured":"Jaeger, S., et al.: Automatic tuberculosis screening using chest radiographs. IEEE Trans. Med. Imaging 33(2), 233\u2013245 (2014). https:\/\/doi.org\/10.1109\/TMI.2013.2284099","DOI":"10.1109\/TMI.2013.2284099"},{"key":"18_CR27","doi-asserted-by":"publisher","unstructured":"Candemir, S., et al.: Lung segmentation in chest radiographs using anatomical atlases with nonrigid registration. IEEE Trans. Med. Imaging 33(2), 577\u2013590 (2014). https:\/\/doi.org\/10.1109\/TMI.2013.2290491","DOI":"10.1109\/TMI.2013.2290491"},{"key":"18_CR28","doi-asserted-by":"publisher","unstructured":"Bustos, A., et al.: PadChest: a large chest x-ray image dataset with multi-label annotated reports. Med. Image Anal. 66, 101797 (2020). issn: 1361-8415. https:\/\/doi.org\/10.1016\/j.media. 2020.101797","DOI":"10.1016\/j.media"},{"key":"18_CR29","doi-asserted-by":"publisher","unstructured":"Cohen, J.P., et al.: Radiographic assessment of lung opacity score dataset. Version v1 (2021). https:\/\/doi.org\/10.5281\/zenodo.4634000","DOI":"10.5281\/zenodo.4634000"},{"key":"18_CR30","doi-asserted-by":"publisher","unstructured":"Reis, E.P., et al.: BRAX, Brazilian labeled chest x-ray dataset. Sci. Data 9(1), 487 (2022). issn: 2052-4463. https:\/\/doi.org\/10.1038\/s41597-022-01608-8","DOI":"10.1038\/s41597-022-01608-8"},{"key":"18_CR31","doi-asserted-by":"publisher","unstructured":"Shiraishi, J., et al.: Development of a digital image database for chest radiographs with and without a lung nodule. Am. J. Roentgenol. 174(1), 71\u201374 (2000). PMID: 10628457. https:\/\/doi.org\/10.2214\/ajr.174.1.1740071","DOI":"10.2214\/ajr.174.1.1740071"},{"key":"18_CR32","doi-asserted-by":"publisher","unstructured":"Kermany, D.S., et al.: Identifying medical diagnoses and treatable diseases by image-based deep learning. Cell 172(5), 1122\u20131131.e9 (2018). issn: 0092-8674. https:\/\/doi.org\/10.1016\/j.cell.2018.02.010","DOI":"10.1016\/j.cell.2018.02.010"},{"key":"18_CR33","doi-asserted-by":"crossref","unstructured":"Cohen, J.P., et al.: COVID-19 image data collection: prospective predictions are the future. arxiv arXiv:2006.11988 (2020)","DOI":"10.59275\/j.melba.2020-48g7"},{"key":"18_CR34","doi-asserted-by":"publisher","unstructured":"Chowdhury, M.E.H., et al.: Can AI help in screening viral and COVID-19 pneumonia?. IEEE Access 8, 132665\u2013132676 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.3010287","DOI":"10.1109\/ACCESS.2020.3010287"},{"key":"18_CR35","doi-asserted-by":"publisher","unstructured":"Rahman, T., et al.: Exploring the effect of image enhancement techniques on COVID-19 detection using chest X-ray images. Comput. Biol. Med. 132, 104319 (2021). issn: 0010-4825. https:\/\/doi.org\/10.1016\/j.compbiomed.2021.104319","DOI":"10.1016\/j.compbiomed.2021.104319"},{"key":"18_CR36","unstructured":"JF Healthcare. Object-CXR - Automatic detection of foreign objects on chest X-rays (2020)"},{"key":"18_CR37","doi-asserted-by":"publisher","unstructured":"Nguyen, H.Q., et al.: VinDr-CXR: an open dataset of chest X-rays with radiologist\u2019s annotations Sci. Data 9(1), 429 (2022). 2052-4463. https:\/\/doi.org\/10.1038\/s41597-022-01498-w","DOI":"10.1038\/s41597-022-01498-w"},{"key":"18_CR38","doi-asserted-by":"publisher","unstructured":"Pham, H.H., et al.: PediCXR: an open, large-scale chest radiograph dataset for interpretation of common thoracic diseases in children. Sci. Data 10(1), 240 (2023). issn: 2052-4463. https:\/\/doi.org\/10.1038\/s41597-023-02102-5","DOI":"10.1038\/s41597-023-02102-5"},{"key":"18_CR39","unstructured":"Zawacki, A., et al.: SIIM-ACR Pneumothorax Segmentation (2019)"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Rethinking computer-aided tuberculosis diagnosis. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2646\u20132655 (2020)","DOI":"10.1109\/CVPR42600.2020.00272"},{"key":"18_CR41","doi-asserted-by":"publisher","unstructured":"Rahman, T., et al.: TB-CXRNet: tuberculosis and drug-resistant tuberculosis detection technique using chest X-ray images. Cogn. Comput. (2024). issn: 1866-9964. https:\/\/doi.org\/10.1007\/s12559-024-10259-3","DOI":"10.1007\/s12559-024-10259-3"},{"key":"18_CR42","doi-asserted-by":"publisher","unstructured":"Fedorov, A., et al.: NCI imaging data commons. Cancer Res. 81(16), 4188\u20134193 (2021). issn: 0008-5472. https:\/\/doi.org\/10.1158\/0008-5472.CAN-21-0950. https:\/\/aacrjournals.org\/cancerres\/article-pdf\/81\/16\/4188\/3294013\/4188.pdf","DOI":"10.1158\/0008-5472.CAN-21-0950"},{"key":"18_CR43","doi-asserted-by":"crossref","unstructured":"Rombach, R., et al.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"18_CR44","unstructured":"Chambon, P.J.M., et al.: Adapting pretrained vision-language foundational models to medical imaging domains. In: NeurIPS 2022 Foundation Models for Decision Making Workshop, pp. 1\u201312 (2022)"},{"key":"18_CR45","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations, pp. 1\u201321 (2021)"},{"key":"18_CR46","unstructured":"Darcet, T., et al.: Vision transformers need registers. In: The Twelfth International Conference on Learning Representations, pp. 1\u201321 (2024)"},{"key":"18_CR47","unstructured":"Zhang, H., et al.: mixup: Beyond empirical risk minimization. In: International Conference on Learning Representations, pp. 1\u201313 (2018)"},{"key":"18_CR48","unstructured":"Heusel, M., et al.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol. 30, pp. 1\u201312. Curran Associates, Inc. (2017)"},{"key":"18_CR49","unstructured":"Cohen, J.P., et al.: TorchXRayVision: a library of chest X-ray datasets and models. In: Konukoglu, E., et al. (eds.) Proceedings of The 5th International Conference on Medical Imaging with Deep Learning, vol. 172, pp. 231\u2013249. Proceedings of Machine Learning Research. PMLR (2022)"},{"key":"18_CR50","unstructured":"von Platen, P., et al.: Diffusers: state-of-the-art diffusion models (2022). https:\/\/github.com\/huggingface\/diffusers\/blob\/main\/examples\/text_to_image\/train_text_to_image.py"},{"key":"18_CR51","unstructured":"Lefaudeux, B., et al. xFormers: A modular and hackable Transformer modelling library (2022). https:\/\/github.com\/facebookresearch\/xformers"},{"key":"18_CR52","unstructured":"WebDataset Contributors. WebDataset: A PyTorch I\/O Dataset for Large- Scale Data (2021). https:\/\/github.com\/webdataset\/webdataset. Accessed 01 Jan 2024"},{"key":"18_CR53","doi-asserted-by":"publisher","unstructured":"Xu, X., et al.: Lung segmentation in chest X-ray image using multiinteraction feature fusion network. IET Image Process. 17(14), 4129\u20134141 (2023). https:\/\/doi.org\/10.1049\/ipr2.12923. https:\/\/ietresearch.onlinelibrary.wiley.com\/doi\/pdf\/10.1049\/ipr2.12923","DOI":"10.1049\/ipr2.12923"},{"key":"18_CR54","doi-asserted-by":"publisher","unstructured":"Liu, W., Luo, J., Yang, Y., et al.: Automatic lung segmentation in chest X-ray images using improved U-Net. Sci. Rep. 12, 8649 (2022). https:\/\/doi.org\/10.1038\/s41598-022-12743-y","DOI":"10.1038\/s41598-022-12743-y"},{"key":"18_CR55","doi-asserted-by":"publisher","unstructured":"Akhtar, M., et al.: Croissant: a metadata format for ML-ready datasets. In: Proceedings of the Eighth Workshop on Data Management for End-to-End Machine Learning. DEEM \u201924, pp. 1\u20136. Association for Computing Machinery, Santiago (2024). isbn: 9798400706110. https:\/\/doi.org\/10.1145\/3650203.3663326","DOI":"10.1145\/3650203.3663326"},{"key":"18_CR56","doi-asserted-by":"crossref","unstructured":"Xu, X., et al.: ViT-DAE: transformer-driven diffusion autoencoder for histopathology image analysis. In: Mukhopadhyay, A., et al. (eds.) Deep Generative Models, pp. 66\u201376. Springer, Cham (2024). isbn: 978-3-031-53767-7","DOI":"10.1007\/978-3-031-53767-7_7"}],"container-title":["Lecture Notes in Computer Science","Medical Image Understanding and Analysis"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-98688-8_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T09:09:30Z","timestamp":1752656970000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-98688-8_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,17]]},"ISBN":["9783031986871","9783031986888"],"references-count":56,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-98688-8_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,7,17]]},"assertion":[{"value":"17 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MIUA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Annual Conference on Medical Image Understanding and Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Leeds","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miua2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.leeds.ac.uk\/miua\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}