{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T06:12:42Z","timestamp":1758348762752,"version":"3.44.0"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032049360"},{"type":"electronic","value":"9783032049377"}],"license":[{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04937-7_58","type":"book-chapter","created":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T05:40:10Z","timestamp":1758260410000},"page":"611-620","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Interpretable Counterfactual Generation via\u00a0Multimodal Autoregression"],"prefix":"10.1007","author":[{"given":"Chenglong","family":"Ma","sequence":"first","affiliation":[]},{"given":"Yuanfeng","family":"Ji","sequence":"additional","affiliation":[]},{"given":"Jin","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Lu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Tianbin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Mingjie","family":"Li","sequence":"additional","affiliation":[]},{"given":"Junjun","family":"He","sequence":"additional","affiliation":[]},{"given":"Hongming","family":"Shan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,20]]},"reference":[{"key":"58_CR1","doi-asserted-by":"crossref","unstructured":"Alaya, M.B., Lang, D.M., Wiestler, B., Schnabel, J.A., Bercea, C.I.: MedEdit: Counterfactual diffusion-based image editing on brain MRI. In: International Workshop on Simulation and Synthesis in Medical Imaging, pp. 167\u2013176 (2024)","DOI":"10.1007\/978-3-031-73281-2_16"},{"key":"58_CR2","unstructured":"Banerjee, S., Lavie, A.: METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pp. 65\u201372 (2005)"},{"key":"58_CR3","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: InstructPix2Pix: Learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"58_CR4","unstructured":"Chambon, P., et al.: Roentgen: vision-language foundation model for chest x-ray generation (2022). arXiv preprint arXiv:2211.12737"},{"key":"58_CR5","unstructured":"Chambon, P., et al.: CheXpert Plus: Augmenting a large chest x-ray dataset with text radiology reports, patient demographics and additional image formats (2024). arXiv preprint arXiv:2405.19538"},{"key":"58_CR6","unstructured":"Chen, X., et al.: Janus-pro: Unified multimodal understanding and generation with data and model scaling (2025). arXiv preprint arXiv:2501.17811"},{"key":"58_CR7","unstructured":"Cohen, J.P., et al.: Gifsplanation via latent shift: a simple autoencoder approach to counterfactual generation for chest x-rays. In: Medical Imaging with Deep Learning, pp. 74\u2013104 (2021)"},{"key":"58_CR8","unstructured":"Cohen, J.P., Hashir, M., Brooks, R., Bertrand, H.: On the limits of cross-domain generalization in automated x-ray prediction. In: Medical Imaging with Deep Learning, pp. 136\u2013155 (2020)"},{"key":"58_CR9","unstructured":"Fang, Y., et al.: Decoding report generators: a cyclic vision-language adapter for counterfactual explanations (2024). arXiv preprint arXiv:2411.05261"},{"key":"58_CR10","doi-asserted-by":"crossref","unstructured":"Fang, Y., et al.: Diffexplainer: Unveiling black box models via counterfactual generation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 208\u2013218 (2024)","DOI":"10.1007\/978-3-031-72117-5_20"},{"key":"58_CR11","unstructured":"Gu, Y., et al.: Biomedjourney: counterfactual biomedical image generation by instruction-learning from multimodal patient journeys (2023). arXiv preprint arXiv:2310.10765"},{"key":"58_CR12","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"58_CR13","doi-asserted-by":"crossref","unstructured":"Johnson, A.E., et al.: MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports. Sci. Data 6(1), 317 (2019)","DOI":"10.1038\/s41597-019-0322-0"},{"key":"58_CR14","unstructured":"Kyung, D., Kim, J., Kim, T., Choi, E.: Towards predicting temporal changes in a patient\u2019s chest x-ray images based on electronic health records (2024). arXiv preprint arXiv:2409.07012"},{"issue":"10428","key":"58_CR15","doi-asserted-by":"publisher","first-page":"717","DOI":"10.1016\/S0140-6736(24)00313-1","volume":"403","author":"SI Lee","year":"2024","unstructured":"Lee, S.I., Topol, E.J.: The clinical potential of counterfactual AI models. Lancet 403(10428), 717 (2024)","journal-title":"Lancet"},{"key":"58_CR16","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880 (2020)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"58_CR17","doi-asserted-by":"crossref","unstructured":"Li, M., et al.: Contrastive learning with counterfactual explanations for radiology report generation. In: European Conference on Computer Vision, pp. 162\u2013180 (2024)","DOI":"10.1007\/978-3-031-72775-7_10"},{"key":"58_CR18","unstructured":"Liang, K., et al.: Pie: Simulating disease progression via progressive image editing (2023). arXiv preprint arXiv:2309.11745"},{"key":"58_CR19","unstructured":"Lin, C.Y.: Rouge: A package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"58_CR20","doi-asserted-by":"crossref","unstructured":"Liu, S., Wang, F., Ren, Z., Lian, C., Ma, J.: Controllable counterfactual generation for interpretable medical image classification. In: Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2024, pp. 143\u2013152 (2024)","DOI":"10.1007\/978-3-031-72117-5_14"},{"key":"58_CR21","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019)"},{"key":"58_CR22","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"58_CR23","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"58_CR24","doi-asserted-by":"crossref","unstructured":"Shentu, J., Al\u00a0Moubayed, N.: CXR-IRGen: an integrated vision and language model for the generation of clinically accurate chest x-ray image-report pairs. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5212\u20135221 (2024)","DOI":"10.1109\/WACV57701.2024.00513"},{"key":"58_CR25","unstructured":"Shi, W., et al.: Llamafusion: Adapting pretrained language models for multimodal generation (2024). arXiv preprint arXiv:2412.15188"},{"key":"58_CR26","unstructured":"Team, C.: Chameleon: Mixed-modal early-fusion foundation models (2024). arXiv preprint arXiv:2405.09818"},{"key":"58_CR27","unstructured":"Wang, X., et\u00a0al.: Emu3: Next-token prediction is all you need (2024). arXiv preprint arXiv:2409.18869"},{"key":"58_CR28","unstructured":"Zhang, S., et\u00a0al.: Biomedclip: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs (2023). arXiv preprint arXiv:2303.00915"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04937-7_58","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T05:40:26Z","timestamp":1758260426000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04937-7_58"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,20]]},"ISBN":["9783032049360","9783032049377"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04937-7_58","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,20]]},"assertion":[{"value":"20 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}