{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T09:51:47Z","timestamp":1758448307043,"version":"3.44.0"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032051134"},{"type":"electronic","value":"9783032051141"}],"license":[{"start":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T00:00:00Z","timestamp":1758412800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T00:00:00Z","timestamp":1758412800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05114-1_26","type":"book-chapter","created":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T14:10:20Z","timestamp":1758377420000},"page":"267-277","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["FEAT: Full-Dimensional Efficient Attention Transformer for\u00a0Medical Video Generation"],"prefix":"10.1007","author":[{"given":"Huihan","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiwen","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bingzheng","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,21]]},"reference":[{"key":"26_CR1","unstructured":"Dorjsembe, Z., Odonchimed, S., Xiao, F.: Three-dimensional medical image synthesis with denoising diffusion probabilistic models. In: Medical Imaging with Deep Learning (2022)"},{"key":"26_CR2","unstructured":"Wang, Z., Zhang, L., Wang, L., Zhang, Z.: Soft masked mamba diffusion model for CT to MRI conversion. arXiv preprint arXiv:2406.15910 (2024)"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Liu, J., et al.: DOLCE: a model-based probabilistic diffusion framework for limited-angle CT reconstruction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10498\u201310508 (2023)","DOI":"10.1109\/ICCV51070.2023.00963"},{"key":"26_CR4","unstructured":"Singer, U., et\u00a0al.: Make-a-video: text-to-video generation without text-video data. arXiv preprint arXiv:2209.14792 (2022)"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Chen, H., et al.: VideoCrafter2: overcoming data limitations for high-quality video diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7310\u20137320 (2024)","DOI":"10.1109\/CVPR52733.2024.00698"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Li, C., et al.: Endora: video generation models as endoscopy simulators. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 230\u2013240. Springer (2024)","DOI":"10.1007\/978-3-031-72089-5_22"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Xing, J., et\u00a0al.: Make-your-video: customized video generation using textual and structural guidance. IEEE Trans. Vis. Comput. Graph. (2024)","DOI":"10.1109\/TVCG.2024.3365804"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Zamir, S.W., Arora, A., Khan, S., Hayat, M., Khan, F.S., Yang, M.H.: Restormer: efficient transformer for high-resolution image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5728\u20135739 (2022)","DOI":"10.1109\/CVPR52688.2022.00564"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4195\u20134205 (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"26_CR11","unstructured":"Peng, B., et\u00a0al.: RWKV: reinventing RNNs for the transformer era. arXiv preprint arXiv:2305.13048 (2023)"},{"key":"26_CR12","unstructured":"Duan, Y., et al.: Vision-RWKV: efficient and scalable visual perception with RWKV-like architectures. arXiv preprint arXiv:2403.02308 (2024)"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Yang, Z., Li, J., Zhang, H., Zhao, D., Wei, B., Xu, Y.: Restore-RWKV: efficient and effective medical image restoration with RWKV. arXiv preprint arXiv:2407.11087 (2024)","DOI":"10.1109\/JBHI.2025.3588555"},{"key":"26_CR14","unstructured":"Shen, Z., Zhang, M., Zhao, H., Yi, S., Li, H.: Efficient attention: attention with linear complexities. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3531\u20133539 (2021)"},{"key":"26_CR15","first-page":"21696","volume":"34","author":"D Kingma","year":"2021","unstructured":"Kingma, D., Salimans, T., Poole, B., Ho, J.: Variational diffusion models. Adv. Neural. Inf. Process. Syst. 34, 21696\u201321707 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"26_CR17","unstructured":"He, Y., Yang, T., Zhang, Y., Shan, Y., Chen, Q.: Latent video diffusion models for high-fidelity long video generation. arXiv preprint arXiv:2211.13221 (2022)"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1251\u20131258 (2017)","DOI":"10.1109\/CVPR.2017.195"},{"key":"26_CR19","unstructured":"Zhou, Z., Wu, T., Jiang, Z., Lan, Z.: Value residual learning for alleviating attention concentration in transformers. arXiv preprint arXiv:2410.17897 (2024)"},{"issue":"9","key":"26_CR20","doi-asserted-by":"publisher","first-page":"2051","DOI":"10.1109\/TMI.2016.2547947","volume":"35","author":"P Mesejo","year":"2016","unstructured":"Mesejo, P., et al.: Computer-aided classification of gastrointestinal lesions in regular colonoscopy. IEEE Trans. Med. Imaging 35(9), 2051\u20132063 (2016)","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"1","key":"26_CR21","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1038\/s41597-020-00622-y","volume":"7","author":"H Borgli","year":"2020","unstructured":"Borgli, H., et al.: HyperKvasir, a comprehensive multi-class image and video dataset for gastrointestinal endoscopy. Sci. Data 7(1), 283 (2020)","journal-title":"Sci. Data"},{"key":"26_CR22","unstructured":"Parmar, G., Zhang, R., Zhu, J.Y.: On buggy resizing libraries and surprising subtleties in fid calculation. arXiv preprint arXiv:2104.11222 (2021). 5(14), 6"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Saito, M., Matsumoto, E., Saito, S.: Temporal generative adversarial nets with singular value clipping. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2830\u20132839 (2017)","DOI":"10.1109\/ICCV.2017.308"},{"key":"26_CR24","unstructured":"Unterthiner, T., Van\u00a0Steenkiste, S., Kurach, K., Marinier, R., Michalski, M., Gelly, S.: Towards accurate generative models of video: a new metric & challenges. arXiv preprint arXiv:1812.01717 (2018)"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Ge, S., Mahapatra, A., Parmar, G., Zhu, J.Y., Huang, J.B.: On the content bias in Fr\u00e9chet video distance. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7277\u20137288 (2024)","DOI":"10.1109\/CVPR52733.2024.00695"},{"key":"26_CR26","doi-asserted-by":"crossref","unstructured":"Skorokhodov, I., Tulyakov, S., Elhoseiny, M.: StyleGAN-V: a continuous video generator with the price, image quality and perks of stylegan2. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3626\u20133636 (2022)","DOI":"10.1109\/CVPR52688.2022.00361"},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Shen, X., Li, X., Elhoseiny, M.: MostGAN-V: video generation with temporal motion styles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5652\u20135661 (2023)","DOI":"10.1109\/CVPR52729.2023.00547"},{"key":"26_CR28","unstructured":"Blattmann, A., et\u00a0al.: Stable video diffusion: scaling latent video diffusion models to large datasets. arXiv preprint arXiv:2311.15127 (2023)"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Tian, Y., et al.: Contrastive transformer-based multiple instance learning for weakly supervised polyp frame detection. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 88\u201398. Springer (2022)","DOI":"10.1007\/978-3-031-16437-8_9"},{"key":"26_CR30","first-page":"596","volume":"33","author":"K Sohn","year":"2020","unstructured":"Sohn, K., et al.: FixMatch: simplifying semi-supervised learning with consistency and confidence. Adv. Neural. Inf. Process. Syst. 33, 596\u2013608 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05114-1_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T14:10:28Z","timestamp":1758377428000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05114-1_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,21]]},"ISBN":["9783032051134","9783032051141"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05114-1_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,21]]},"assertion":[{"value":"21 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"We have no conflicts of interest to disclose.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}