{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:28:49Z","timestamp":1775579329312,"version":"3.50.1"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031439988","type":"print"},{"value":"9783031439995","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43999-5_14","type":"book-chapter","created":{"date-parts":[[2023,9,30]],"date-time":"2023-09-30T23:08:57Z","timestamp":1696115337000},"page":"142-152","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":26,"title":["Feature-Conditioned Cascaded Video Diffusion Models for\u00a0Precise Echocardiogram Synthesis"],"prefix":"10.1007","author":[{"given":"Hadrien","family":"Reynaud","sequence":"first","affiliation":[]},{"given":"Mengyun","family":"Qiao","sequence":"additional","affiliation":[]},{"given":"Mischa","family":"Dombrowski","sequence":"additional","affiliation":[]},{"given":"Thomas","family":"Day","sequence":"additional","affiliation":[]},{"given":"Reza","family":"Razavi","sequence":"additional","affiliation":[]},{"given":"Alberto","family":"Gomez","sequence":"additional","affiliation":[]},{"given":"Paul","family":"Leeson","sequence":"additional","affiliation":[]},{"given":"Bernhard","family":"Kainz","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,1]]},"reference":[{"key":"14_CR1","unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction. arXiv:1710.11252 (2018)"},{"key":"14_CR2","unstructured":"Babaeizadeh, M., Saffar, M.T., Nair, S., Levine, S., Finn, C., Erhan, D.: FitVid: overfitting in pixel-level video prediction. arXiv:2106.13195 (2021)"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Esser, P., Chiu, J., Atighehchian, P., Granskog, J., Germanidis, A.: Structure and content-guided video synthesis with diffusion models. arXiv:2302.03011 (2023)","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"14_CR4","unstructured":"Finn, C., Goodfellow, I., Levine, S.: Unsupervised learning for physical interaction through video prediction. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"14_CR5","unstructured":"Gupta, A., Tian, S., Zhang, Y., Wu, J., Mart\u00edn-Mart\u00edn, R., Fei-Fei, L.: MaskViT: masked visual pre-training for video prediction. arXiv:2206.11894 (2022)"},{"key":"14_CR6","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. arXiv:1706.08500 (2018)"},{"key":"14_CR7","unstructured":"Ho, J., et al.: Imagen video: high definition video generation with diffusion models (2022). arXiv:2210.02303"},{"key":"14_CR8","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"14_CR9","first-page":"1","volume":"23","author":"J Ho","year":"2022","unstructured":"Ho, J., Saharia, C., Chan, W., Fleet, D.J., Norouzi, M., Salimans, T.: Cascaded diffusion models for high fidelity image generation. J. Mach. Learn. Res. 23, 1\u201333 (2022)","journal-title":"J. Mach. Learn. Res."},{"key":"14_CR10","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., Fleet, D.J.: Video diffusion models (2022). arXiv:2204.03458"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Jensen, J.: Simulation of advanced ultrasound systems using Field II. In: 2004 2nd IEEE International Symposium on Biomedical Imaging: Nano to Macro (IEEE Cat No. 04EX821), pp. 636\u2013639, vol. 1 (2004)","DOI":"10.1109\/ISBI.2004.1398618"},{"key":"14_CR12","unstructured":"Kalchbrenner, N., et al.: Video pixel networks. In: ICML, pp. 1771\u20131779 (2017)"},{"key":"14_CR13","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. arXiv:2206.00364 (2022)"},{"key":"14_CR14","unstructured":"Kumar, M., et al.: VideoFlow: a conditional flow-based model for stochastic video generation. arXiv:1903.01434 (2020)"},{"key":"14_CR15","first-page":"1113","volume":"24","author":"M Ledesma-Carbayo","year":"2005","unstructured":"Ledesma-Carbayo, M., et al.: Spatio-temporal nonrigid registration for ultrasound cardiac motion estimation. IEEE TMI 24, 1113\u20131126 (2005)","journal-title":"IEEE TMI"},{"key":"14_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102461","volume":"79","author":"J Liang","year":"2022","unstructured":"Liang, J., et al.: Sketch guided and progressive growing GAN for realistic and editable ultrasound image synthesis. Med. Image Anal. 79, 102461 (2022)","journal-title":"Med. Image Anal."},{"key":"14_CR17","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1007\/978-3-031-16440-8_30","volume-title":"MICCAI 2022","author":"J Liang","year":"2022","unstructured":"Liang, J., et al.: Weakly-supervised high-fidelity ultrasound video synthesis with feature decoupling. In: Wang, L., Dou, Q., Fletcher, P.T., Speidel, S., Li, S. (eds.) MICCAI 2022. LNCS, vol. 13434, pp. 310\u2013319. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16440-8_30"},{"key":"14_CR18","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1007\/978-3-031-16440-8_35","volume-title":"MICCAI 2022","author":"M Mokhtari","year":"2022","unstructured":"Mokhtari, M., Tsang, T., Abolmaesumi, P., Liao, R.: EchoGNN: explainable ejection fraction estimation with graph neural networks. In: Wang, L., Dou, Q., Fletcher, P.T., Speidel, S., Li, S. (eds.) MICCAI 2022. LNCS, vol. 13434, pp. 360\u2013369. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16440-8_35"},{"key":"14_CR19","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1038\/s41586-020-2145-8","volume":"580","author":"D Ouyang","year":"2020","unstructured":"Ouyang, D., et al.: Video-based AI for beat-to-beat assessment of cardiac function. Nature 580, 252\u2013256 (2020)","journal-title":"Nature"},{"key":"14_CR20","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. arXiv:2102.12092 (2021)"},{"key":"14_CR21","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1007\/978-3-031-16452-1_57","volume-title":"MICCAI 2022","author":"H Reynaud","year":"2022","unstructured":"Reynaud, H., et al.: D\u2019ARTAGNAN: counterfactual video generation. In: Wang, L., Dou, Q., Fletcher, P.T., Speidel, S., Li, S. (eds.) MICCAI 2022. LNCS, vol. 13438, pp. 599\u2013609. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16452-1_57"},{"key":"14_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1007\/978-3-030-87231-1_48","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"H Reynaud","year":"2021","unstructured":"Reynaud, H., Vlontzos, A., Hou, B., Beqiri, A., Leeson, P., Kainz, B.: Ultrasound video transformers for\u00a0cardiac ejection fraction estimation. In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12906, pp. 495\u2013505. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87231-1_48"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. arXiv:2112.10752 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv:2205.11487 (2022)","DOI":"10.1145\/3528233.3530757"},{"key":"14_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1007\/978-3-319-24571-3_61","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"M Salehi","year":"2015","unstructured":"Salehi, M., Ahmadi, S.-A., Prevost, R., Navab, N., Wein, W.: Patient-specific 3D ultrasound simulation based on convolutional ray-tracing and appearance optimization. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9350, pp. 510\u2013518. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24571-3_61"},{"key":"14_CR26","unstructured":"Salimans, T., Ho, J.: Progressive distillation for fast sampling of diffusion models. arXiv:2202.00512 (2022)"},{"key":"14_CR27","doi-asserted-by":"publisher","first-page":"4902","DOI":"10.1118\/1.3480985","volume":"37","author":"WP Segars","year":"2010","unstructured":"Segars, W.P., Sturgeon, G., Mendonca, S., Grimes, J., Tsui, B.M.W.: 4D XCAT phantom for multimodality imaging research. Med. Phys. 37, 4902\u20134915 (2010)","journal-title":"Med. Phys."},{"key":"14_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"734","DOI":"10.1007\/978-3-540-85990-1_88","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2008","author":"R Shams","year":"2008","unstructured":"Shams, R., Hartley, R., Navab, N.: Real-time simulation of medical ultrasound from CT images. In: Metaxas, D., Axel, L., Fichtinger, G., Sz\u00e9kely, G. (eds.) MICCAI 2008. LNCS, vol. 5242, pp. 734\u2013741. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-85990-1_88"},{"key":"14_CR29","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556 (2015)"},{"key":"14_CR30","unstructured":"Singer, U., et al.: Make-a-video: text-to-video generation without text-video data. arXiv:2209.14792 (2022)"},{"key":"14_CR31","unstructured":"Sohl-Dickstein, J., Weiss, E.A., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. arXiv:1503.03585 (2015)"},{"key":"14_CR32","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv:2010.02502 (2022)"},{"key":"14_CR33","doi-asserted-by":"crossref","unstructured":"Song, Y., Zhu, J., Li, D., Wang, X., Qi, H.: Talking face generation by conditional recurrent adversarial network. arXiv:1804.04786 (2019)","DOI":"10.24963\/ijcai.2019\/129"},{"key":"14_CR34","doi-asserted-by":"publisher","first-page":"106147","DOI":"10.1109\/ACCESS.2020.3000666","volume":"8","author":"L Teng","year":"2020","unstructured":"Teng, L., Fu, Z., Yao, Y.: Interactive translation in echocardiography training system with enhanced cycle-GAN. IEEE Access 8, 106147\u2013106156 (2020)","journal-title":"IEEE Access"},{"key":"14_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1007\/978-3-030-87237-3_63","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"D Tomar","year":"2021","unstructured":"Tomar, D., Zhang, L., Portenier, T., Goksel, O.: Content-preserving unpaired translation from simulated to realistic ultrasound images. In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12908, pp. 659\u2013669. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87237-3_63"},{"key":"14_CR36","unstructured":"Unterthiner, T., Steenkiste, S.V., Kurach, K., Marinier, R., Michalski, M., Gelly, S.: FVD: a new metric for video generation. In: ICLR 2022 Workshop: Deep Generative Models for Highly Structured Data (2019)"},{"key":"14_CR37","unstructured":"Villegas, R., et al.: Phenaki: variable length video generation from open domain textual description. arXiv:2210.02399 (2022)"},{"key":"14_CR38","doi-asserted-by":"crossref","unstructured":"Yang, R., Srivastava, P., Mandt, S.: Diffusion probabilistic modeling for video generation. arXiv:2203.09481 (2022)","DOI":"10.3390\/e25101469"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43999-5_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T19:29:07Z","timestamp":1730230147000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43999-5_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031439988","9783031439995"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43999-5_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"1 October 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vancouver, BC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2023\/en\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2250","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"730","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}