{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T00:10:24Z","timestamp":1758759024910,"version":"3.44.0"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032054715","type":"print"},{"value":"9783032054722","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T00:00:00Z","timestamp":1758758400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T00:00:00Z","timestamp":1758758400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05472-2_6","type":"book-chapter","created":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T09:26:02Z","timestamp":1758705962000},"page":"57-66","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Med-Art: Diffusion Transformer for\u00a02D Medical Text-to-Image Generation"],"prefix":"10.1007","author":[{"given":"Changlu","family":"Guo","sequence":"first","affiliation":[]},{"given":"Anders Nymark","family":"Christensen","sequence":"additional","affiliation":[]},{"given":"Morten Rieger","family":"Hannemose","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,25]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Akrout, M., et\u00a0al.: Diffusion-based data augmentation for skin disease classification: Impact across original medical datasets to fully synthetic images. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 99\u2013109. Springer (2023)","DOI":"10.1007\/978-3-031-53767-7_10"},{"key":"6_CR2","unstructured":"Alexey, D.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929 (2020)"},{"key":"6_CR3","unstructured":"Baldridge, J., et\u00a0al.: Imagen 3. arXiv preprint arXiv:2408.07009 (2024)"},{"key":"6_CR4","unstructured":"Bi\u0144kowski, M., Sutherland, D.J., Arbel, M., Gretton, A.: Demystifying mmd gans. arXiv preprint arXiv:1801.01401 (2018)"},{"key":"6_CR5","unstructured":"Bluethgen, C., et al.: A vision\u2013language foundation model for the generation of realistic chest x-ray images. Nat. Biomed. Eng. 1\u201313 (2024)"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., et\u00a0al.: Pixart-$$\\alpha $$: fast training of diffusion transformer for photorealistic text-to-image synthesis. In: International Conference on Learning Representations (ICLR) (2024)","DOI":"10.1007\/978-3-031-73411-3_5"},{"key":"6_CR7","unstructured":"Chen, T., Xu, B., Zhang, C., Guestrin, C.: Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12873\u201312883 (2021)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Hannemose, M.R., Sundgaard, J.V., Ternov, N.K., Paulsen, R.R., Christensen, A.N.: Was that so hard? estimating human classification difficulty. In: International Workshop on Applications of Medical AI, pp. 88\u201397. Springer (2022)","DOI":"10.1007\/978-3-031-17721-7_10"},{"key":"6_CR10","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"6_CR11","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR12","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"6_CR13","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"6_CR14","unstructured":"Jin, C., Xie, S.: Fast-dit: fast diffusion models with transformers. https:\/\/github.com\/chuanyangjin\/fast-DiT (2024)"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Konz, N., Chen, Y., Dong, H., Mazurowski, M.A.: Anatomically-controllable medical image generation with segmentation-guided diffusion models. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 88\u201398. Springer (2024)","DOI":"10.1007\/978-3-031-72104-5_9"},{"key":"6_CR16","unstructured":"Li, B., et al.: Llava-next: stronger llms supercharge multimodal capabilities in the wild. https:\/\/llava-vlgithub.io\/blog\/2024-05-10-llava-next-stronger-llms (2024)"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Li, S., Lin, Y., Chen, H., Cheng, K.T.: Iterative online image synthesis via diffusion model for imbalanced classification. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 371\u2013381. Springer (2024)","DOI":"10.1007\/978-3-031-72086-4_35"},{"key":"6_CR18","unstructured":"Li, Z., et\u00a0al.: Hunyuan-dit: a powerful multi-resolution diffusion transformer with fine-grained chinese understanding. arXiv preprint arXiv:2405.08748 (2024)"},{"key":"6_CR19","unstructured":"Liu, S.Y., et al.: Dora: weight-decomposed low-rank adaptation. arXiv preprint arXiv:2402.09353 (2024)"},{"key":"6_CR20","unstructured":"Loshchilov, I.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"6_CR21","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: Dpm-solver++: fast solver for guided sampling of diffusion probabilistic models. arXiv preprint arXiv:2211.01095 (2022)"},{"key":"6_CR22","unstructured":"Lu, S., et al.: Ovis: structural embedding alignment for multimodal large language model. arXiv preprint arXiv:2405.20797 (2024)"},{"key":"6_CR23","unstructured":"Micikevicius, P., et\u00a0al.: Mixed precision training. arXiv preprint arXiv:1710.03740 (2017)"},{"key":"6_CR24","unstructured":"Podell, D., et al.: Sdxl: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Pogorelov, K., et\u00a0al.: Kvasir: a multi-class image dataset for computer aided gastrointestinal disease detection. In: Proceedings of the 8th ACM on Multimedia Systems Conference, pp. 164\u2013169 (2017)","DOI":"10.1145\/3083187.3083212"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Prabhakar, C., et al.: 3d vessel graph generation using denoising diffusion. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 3\u201313. Springer (2024)","DOI":"10.1007\/978-3-031-72120-5_1"},{"issue":"140","key":"6_CR27","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6_CR29","doi-asserted-by":"publisher","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015, LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"6_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Fang, Z., Huang, Z., Wu, F., Yao, Y.F., Li, Y.: Adapting pre-trained generative model to medical image for data augmentation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 79\u201389. Springer (2024)","DOI":"10.1007\/978-3-031-72086-4_8"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, J., Huang, J., Jin, S., Lu, S.: Vision-language models for vision tasks: a survey. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3369699"}],"container-title":["Lecture Notes in Computer Science","Deep Generative Models"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05472-2_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T09:26:25Z","timestamp":1758705985000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05472-2_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,25]]},"ISBN":["9783032054715","9783032054722"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05472-2_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,25]]},"assertion":[{"value":"25 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DGM4MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"MICCAI Workshop on Deep Generative Models","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dgm4miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dgm4miccai.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}