{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:36:53Z","timestamp":1771958213111,"version":"3.50.1"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031918551","type":"print"},{"value":"9783031918568","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91856-8_10","type":"book-chapter","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T11:17:47Z","timestamp":1747999067000},"page":"160-177","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["AIM 2024 Challenge on\u00a0Video Super-Resolution Quality Assessment: Methods and\u00a0Results"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8294-0770","authenticated-orcid":false,"given":"Ivan","family":"Molodetskikh","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6124-885X","authenticated-orcid":false,"given":"Artem","family":"Borisov","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8893-9340","authenticated-orcid":false,"given":"Dmitriy","family":"Vatolin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1478-0402","authenticated-orcid":false,"given":"Radu","family":"Timofte","sequence":"additional","affiliation":[]},{"given":"Jianzhao","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Tianwu","family":"Zhi","sequence":"additional","affiliation":[]},{"given":"Yabin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jingwen","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Yiting","family":"Liao","sequence":"additional","affiliation":[]},{"given":"Qing","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Ao-Xiang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Haibo","family":"Lei","sequence":"additional","affiliation":[]},{"given":"Linyan","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Yaqing","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yuqin","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Weixia","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yinan","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Ziheng","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Yuxin","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Xiongkuo","family":"Min","sequence":"additional","affiliation":[]},{"given":"Guangtao","family":"Zhai","sequence":"additional","affiliation":[]},{"given":"Weihua","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Yupeng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Yi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"10_CR1","unstructured":"MSU video codecs comparisons (2022). http:\/\/compression.ru\/video\/codec_comparison\/index_en.html"},{"key":"10_CR2","unstructured":"MSU video quality measurement tool (2022). http:\/\/www.compression.ru\/video\/quality_measure\/video_measurement_tool.html"},{"key":"10_CR3","unstructured":"Papers with code: Vid4 - 4x upscaling benchmark (video super-resolution) (2024). https:\/\/paperswithcode.com\/sota\/video-super-resolution-on-vid4-4x-upscaling"},{"key":"10_CR4","unstructured":"Antsiferova, A., Lavrushkin, S., Smirnov, M., Gushchin, A., Vatolin, D., Kulikov, D.: Video compression dataset and benchmark of learning-based video-quality metrics. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems. vol.\u00a035, pp. 13814\u201313825. Curran Associates, Inc. (2022). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/59ac9f01ea2f701310f3d42037546e4a-Paper-Datasets_and_Benchmarks.pdf"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Bogatyrev, E., Molodetskikh, I., Vatolin, D.: Compressed video quality assessment for super-resolution: a benchmark and a quality metric. arXiv preprint arXiv:2305.04844 (2023)","DOI":"10.20948\/prepr-2022-81"},{"key":"10_CR6","unstructured":"Borisov, A., Bogatyrev, E., Kashkarov, E., Vatolin, D.: MSU video super-resolution quality metrics benchmark (2023). https:\/\/videoprocessing.ai\/benchmarks\/super-resolution-metrics.html"},{"issue":"3\/4","key":"10_CR7","first-page":"324","volume":"39","author":"RA Bradley","year":"1952","unstructured":"Bradley, R.A., Terry, M.E.: Rank analysis of incomplete block designs: I. Method Paired Comparisons. Biometrika 39(3\/4), 324\u2013345 (1952)","journal-title":"Method Paired Comparisons. Biometrika"},{"key":"10_CR8","unstructured":"Cao, Y., Min, X., Gao, Y., Sun, W., Lin, W., Zhai, G.: UNQA: Unified no-reference quality assessment for audio, image, video, and audio-visual content. arXiv preprint arXiv:2407.19704 (2024)"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Chan, K.C., Zhou, S., Xu, X., Loy, C.C.: BasicVSR++: improving video super-resolution with enhanced propagation and alignment. In: IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00588"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Chen, C., et al.: Topiq: A top-down approach from semantics to distortions for image quality assessment. arXiv preprint arXiv:2308.03060 (2023)","DOI":"10.1109\/TIP.2024.3378466"},{"issue":"1","key":"10_CR11","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1109\/TIP.2010.2053549","volume":"20","author":"A Ciancio","year":"2010","unstructured":"Ciancio, A., da Silva, E.A., Said, A., Samadani, R., Obrador, P., et al.: No-reference blur assessment of digital pictures based on multifeature classifiers. IEEE Trans. Image Process. 20(1), 64\u201375 (2010)","journal-title":"IEEE Trans. Image Process."},{"key":"10_CR12","unstructured":"Conde, M.V., et\u00a0al.: AIM 2024 challenge on raw burst alignment via optical flow estimation. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR13","unstructured":"Conde, M.V., Lei, Z., Li, W., Katsavounidis, I., Timofte, R., et\u00a0al.: AIM 2024 challenge on efficient video super-resolution for av1 compressed content. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR14","unstructured":"Conde, M.V., Vasluianu, F.A., Xiong, J., Ye, W., Ranjan, R., Timofte, R., et\u00a0al.: Compressed depth map super-resolution and restoration: AIM 2024 challenge results. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Fang, Y., Zhu, H., Zeng, Y., Ma, K., Wang, Z.: Perceptual quality assessment of smartphone photography. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3677\u20133686 (2020)","DOI":"10.1109\/CVPR42600.2020.00373"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"issue":"1","key":"10_CR17","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1109\/TIP.2015.2500021","volume":"25","author":"D Ghadiyaram","year":"2015","unstructured":"Ghadiyaram, D., Bovik, A.C.: Massive online crowdsourced study of subjective and objective picture quality. IEEE Trans. Image Process. 25(1), 372\u2013387 (2015)","journal-title":"IEEE Trans. Image Process."},{"key":"10_CR18","unstructured":"Hosu, V., Conde, M.V., Timofte, R., Agnolucci, L., Zadtootaghaj, S., Barman, N., et\u00a0al.: AIM 2024 challenge on uhd blind photo quality assessment. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Hosu, V., et al.: The konstanz natural video database (konvid-1k). In: Proceedings of the International Conference on Quality of Multimedia Experience, pp.\u00a01\u20136 (2017)","DOI":"10.1109\/QoMEX.2017.7965673"},{"key":"10_CR20","doi-asserted-by":"publisher","first-page":"4041","DOI":"10.1109\/TIP.2020.2967829","volume":"29","author":"V Hosu","year":"2020","unstructured":"Hosu, V., Lin, H., Sziranyi, T., Saupe, D.: Koniq-10k: an ecologically valid database for deep learning of blind image quality assessment. IEEE Trans. Image Process. 29, 4041\u20134056 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Ji, X., Cao, Y., Tai, Y., Wang, C., Li, J., Huang, F.: Real-world super-resolution via kernel estimation and noise injection. In: The IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops (June 2020)","DOI":"10.1109\/CVPRW50498.2020.00241"},{"key":"10_CR22","unstructured":"Kai, D., Lu, J., Zhang, Y., Sun, X.: EvTexture: event-driven Texture Enhancement for Video Super-Resolution. In: International Conference on Machine Learning. PMLR (2024)"},{"key":"10_CR23","unstructured":"Karetin, N., Molodetskikh, I., Vatolin, D.: MSU video upscalers benchmark: Quality enhancement (2023). https:\/\/videoprocessing.ai\/benchmarks\/video-upscalers.html"},{"key":"10_CR24","doi-asserted-by":"publisher","unstructured":"Kirillova., A., Lyapustin., E., Antsiferova., A., Vatolin., D.: Erqa: edge-restoration quality assessment for video super-resolution. In: Proceedings of the 17th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications - Volume 4: VISAPP, pp. 315\u2013322. INSTICC, SciTePress (2022). https:\/\/doi.org\/10.5220\/0010780900003124","DOI":"10.5220\/0010780900003124"},{"key":"10_CR25","unstructured":"Li, H., Chen, X., Dong, J., Tang, J., Pan, J.: Collaborative feedback discriminative propagation for video super-resolution. arXiv preprint arXiv:2404.04745 (2024)"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Liang, J., Cao, J., Sun, G., Zhang, K., Van\u00a0Gool, L., Timofte, R.: Swinir: Image restoration using swin transformer. arXiv preprint arXiv:2108.10257 (2021)","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"10_CR27","unstructured":"Liang, J., et al.: Recurrent video restoration transformer with guided deformable attention. arXiv preprint arXiv:2206.02146 (2022)"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Liu, J., Li, X., Peng, Y., Yu, T., Chen, Z.: Swiniqa: Learned swin distance for compressed image quality assessment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1795\u20131799 (2022)","DOI":"10.1109\/CVPRW56347.2022.00194"},{"key":"10_CR29","doi-asserted-by":"crossref","unstructured":"Liu, X., Van De\u00a0Weijer, J., Bagdanov, A.D.: Rankiqa: Learning from rankings for no-reference image quality assessment. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1040\u20131049 (2017)","DOI":"10.1109\/ICCV.2017.118"},{"key":"10_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10_CR31","doi-asserted-by":"publisher","unstructured":"Ma, C., Yang, C.Y., Yang, X., Yang, M.H.: Learning a no-reference quality metric for single-image super-resolution. In: 2017 Computer Vision and Image Understanding. vol.\u00a0158, pp. 1\u201316 (2017).https:\/\/doi.org\/10.1016\/j.cviu.2016.12.009","DOI":"10.1016\/j.cviu.2016.12.009"},{"key":"10_CR32","unstructured":"Moskalenko, A., Bryntsev, A., Vatolin, D.S., Timofte, R., et\u00a0al.: AIM 2024 challenge on video saliency prediction: Methods and results. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR33","unstructured":"Nazarczuk, M., Catley-Chandar, S., Tanay, T., Shaw, R., P\u00e9rez-Pellitero, E., Timofte, R., et\u00a0al.: AIM 2024 sparse neural rendering challenge: Methods and results. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR34","unstructured":"Nazarczuk, M., Tanay, T., Catley-Chandar, S., Shaw, R., Timofte, R., P\u00e9rez-Pellitero, E.: AIM 2024 sparse neural rendering challenge: dataset and benchmark. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"Park, S.H., Moon, Y.S., Cho, N.I.: Perception-oriented single image super-resolution using optimal objective estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1725\u20131735 (2023)","DOI":"10.1109\/CVPR52729.2023.00172"},{"key":"10_CR36","doi-asserted-by":"crossref","unstructured":"Prashnani, E., Cai, H., Mostofi, Y., Sen, P.: Pieapp: perceptual image-error assessment through pairwise preference. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (June 2018)","DOI":"10.1109\/CVPR.2018.00194"},{"issue":"12","key":"10_CR37","doi-asserted-by":"publisher","first-page":"1592","DOI":"10.1093\/bioinformatics\/btg197","volume":"19","author":"M Sammeth","year":"2003","unstructured":"Sammeth, M., Rothg\u00e4nger, J., Esser, W., Albert, J., Stoye, J., Harmsen, D.: Qalign: quality-based multiple alignments with dynamic phylogenetic analysis. Bioinformatics 19(12), 1592\u20131593 (2003)","journal-title":"Bioinformatics"},{"issue":"2","key":"10_CR38","doi-asserted-by":"publisher","first-page":"612","DOI":"10.1109\/TIP.2018.2869673","volume":"28","author":"Z Sinno","year":"2018","unstructured":"Sinno, Z., Bovik, A.C.: Large-scale study of perceptual video quality. IEEE Trans. Image Process. 28(2), 612\u2013627 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"10_CR39","unstructured":"Smirnov, M., Gushchin, A., Antsiferova, A., Vatolin, D.S., Timofte, R., et\u00a0al.: AIM 2024 challenge on compressed video quality assessment: Methods and results. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops (2024)"},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"Spearman, C.: The proof and measurement of association between two things. Am. J. Psychol 15(1), 72\u2013101 (1904). http:\/\/www.jstor.org\/stable\/1412159","DOI":"10.2307\/1412159"},{"key":"10_CR41","doi-asserted-by":"crossref","unstructured":"Su, S., et al.: Blindly assess image quality in the wild guided by a self-adaptive hyper network. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2020)","DOI":"10.1109\/CVPR42600.2020.00372"},{"key":"10_CR42","unstructured":"Sun, W., Wu, H., Zhang, Z., Jia, J., Zhang, Z., Cao, L., Chen, Q., Min, X., Lin, W., Zhai, G.: Enhancing blind video quality assessment with rich quality-aware features. arXiv preprint arXiv:2405.08745 (2024)"},{"key":"10_CR43","unstructured":"TopazLabs: Topaz video AI (2020). https:\/\/www.topazlabs.com\/topaz-video-ai"},{"key":"10_CR44","doi-asserted-by":"crossref","unstructured":"Wang, J., Chan, K.C., Loy, C.C.: Exploring CLIP for assessing the look and feel of images. In: AAAI (2023)","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"10_CR45","doi-asserted-by":"crossref","unstructured":"Wang, X., Xie, L., Dong, C., Shan, Y.: Real-ESRGAN: training real-world blind super-resolution with pure synthetic data. In: International Conference on Computer Vision Workshops (ICCVW) (2021)","DOI":"10.1109\/ICCVW54120.2021.00217"},{"key":"10_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Y., Inguva, S., Adsumilli, B.: YouTube UGC dataset for video compression research. In: Proceedings of the International Workshop on Multimedia Signal Processing, pp.\u00a01\u20135 (2019)","DOI":"10.1109\/MMSP.2019.8901772"},{"key":"10_CR47","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: Rich features for perceptual quality assessment of UGC videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13435\u201313444 (2021)","DOI":"10.1109\/CVPR46437.2021.01323"},{"issue":"4","key":"10_CR48","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A., Sheikh, H., Simoncelli, E.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004). https:\/\/doi.org\/10.1109\/TIP.2003.819861","journal-title":"IEEE Trans. Image Process."},{"key":"10_CR49","doi-asserted-by":"crossref","unstructured":"Woo, S., et al.: Convnext v2: co-designing and scaling convnets with masked autoencoders. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16133\u201316142 (June 2023)","DOI":"10.1109\/CVPR52729.2023.01548"},{"key":"10_CR50","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: Fast-vqa: efficient end-to-end video quality assessment with fragment sampling. In: European Conference on Computer Vision, pp. 538\u2013554. Springer (2022)","DOI":"10.1007\/978-3-031-20068-7_31"},{"key":"10_CR51","unstructured":"Wu, H., et al.: Q-align: Teaching LMMs for visual scoring via discrete text-defined levels. arXiv preprint arXiv:2312.17090 (2023)"},{"key":"10_CR52","doi-asserted-by":"crossref","unstructured":"Xu, K., Yu, Z., Wang, X., Mi, M.B., Yao, A.: Enhancing video super-resolution via implicit resampling-based alignment (2024). https:\/\/arxiv.org\/abs\/2305.00163","DOI":"10.1109\/CVPR52733.2024.00246"},{"key":"10_CR53","doi-asserted-by":"crossref","unstructured":"Ye, Q., et al.: mplug-owl2: revolutionizing multi-modal large language model with modality collaboration (2023)","DOI":"10.1109\/CVPR52733.2024.01239"},{"key":"10_CR54","doi-asserted-by":"crossref","unstructured":"Ying, Z., Mandal, M., Ghadiyaram, D., Bovik, A.: Patch-vq:\u2019patching up\u2019the video quality problem. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14019\u201314029 (2021)","DOI":"10.1109\/CVPR46437.2021.01380"},{"issue":"8","key":"10_CR55","doi-asserted-by":"publisher","first-page":"4749","DOI":"10.1109\/TCYB.2023.3338615","volume":"54","author":"AX Zhang","year":"2024","unstructured":"Zhang, A.X., Wang, Y.G., Tang, W., Li, L., Kwong, S.: A spatial-temporal video quality assessment method via comprehensive hvs simulation. IEEE Trans. Cybern. 54(8), 4749\u20134762 (2024). https:\/\/doi.org\/10.1109\/TCYB.2023.3338615","journal-title":"IEEE Trans. Cybern."},{"key":"10_CR56","doi-asserted-by":"publisher","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00068","DOI":"10.1109\/CVPR.2018.00068"},{"issue":"1","key":"10_CR57","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1109\/TCSVT.2018.2886771","volume":"30","author":"W Zhang","year":"2020","unstructured":"Zhang, W., Ma, K., Yan, J., Deng, D., Wang, Z.: Blind image quality assessment using a deep bilinear convolutional neural network. IEEE Trans. Circuits Syst. Video Technol. 30(1), 36\u201347 (2020). https:\/\/doi.org\/10.1109\/TCSVT.2018.2886771","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10_CR58","doi-asserted-by":"crossref","unstructured":"Zhang, W., Zhai, G., Wei, Y., Yang, X., Ma, K.: Blind image quality assessment via vision-language correspondence: a multitask learning perspective. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14071\u201314081 (2023)","DOI":"10.1109\/CVPR52729.2023.01352"},{"key":"10_CR59","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhang, L., Zhao, X., Wang, K., Li, L., Gu, S.: Video super-resolution transformer with masked inter &intra-frame attention (2024). https:\/\/openreview.net\/forum?id=ZGBOfAQrMl","DOI":"10.1109\/CVPR52733.2024.02400"},{"key":"10_CR60","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., Wang, X.: Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv:2401.09417 (2024)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91856-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T11:18:12Z","timestamp":1747999092000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91856-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031918551","9783031918568"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91856-8_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}