{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T15:13:09Z","timestamp":1778857989373,"version":"3.51.4"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T00:00:00Z","timestamp":1776729600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T00:00:00Z","timestamp":1776729600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1007\/s00138-026-01820-1","type":"journal-article","created":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T17:44:34Z","timestamp":1776793474000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Local feature extraction method based on feature aggregation, mixed convolution and attention"],"prefix":"10.1007","volume":"37","author":[{"given":"Lunming","family":"Qin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengqian","family":"Quan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoyang","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Houqin","family":"Bian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,21]]},"reference":[{"issue":"2","key":"1820_CR1","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vis. 60(2), 91\u2013110 (2004). https:\/\/doi.org\/10.1023\/B:VISI.0000029664.99615.94","journal-title":"Int. J. Comput. Vis."},{"key":"1820_CR2","doi-asserted-by":"publisher","unstructured":"Yi, K.M., Trulls, E., Lepetit, V., Fua, P.: LIFT: learned invariant feature transform. In: Proceedings of European Conference on Computer Vision (ECCV), pp. 467\u2013483. Switzerland, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46466-4_28","DOI":"10.1007\/978-3-319-46466-4_28"},{"key":"1820_CR3","doi-asserted-by":"publisher","unstructured":"Tian, Y., Fan, B., Wu, F.: L2-Net: deep learning of discriminative patch descriptor in Euclidean space. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 661\u2013669. Honolulu, USA (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.649","DOI":"10.1109\/CVPR.2017.649"},{"key":"1820_CR4","unstructured":"Tyszkiewicz, M., Fua, P., Trulls, E.: Disk: learning local features with policy gradient. In: Proceedings of the 34th Conference on Neural Information Processing Systems (NeurIPS), pp. 14254\u201314265 (2020)"},{"key":"1820_CR5","doi-asserted-by":"publisher","first-page":"3101","DOI":"10.1109\/TMM.2022.3155927","volume":"25","author":"XM Zhao","year":"2023","unstructured":"Zhao, X.M., Wu, X.M., Miao, J.Y., Liu, Y., Chen, Y.: ALIKE: accurate and lightweight keypoint detection and descriptor extraction. IEEE Trans. Multimed. 25, 3101\u20133112 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3155927","journal-title":"IEEE Trans. Multimed."},{"key":"1820_CR6","doi-asserted-by":"publisher","unstructured":"Tian, Y., Balntas, V., Ng, T., Barroso-Laguna, A., Demiris, Y., Mikolajczyk, K.: D2D: Keypoint extraction with describe to detect approach. In: Proceedings of Asian Conference on Computer Vision (ACCV), pp. 123\u2013139. Taipei (2020). https:\/\/doi.org\/10.1007\/978-3-030-69535-4_14","DOI":"10.1007\/978-3-030-69535-4_14"},{"key":"1820_CR7","doi-asserted-by":"publisher","first-page":"591","DOI":"10.1109\/TIP.2022.3231135","volume":"32","author":"Y Deng","year":"2023","unstructured":"Deng, Y., Ma, J.: Redfeat: recoupling detection and description for multimodal feature learning. IEEE Trans. Image Process. 32, 591\u2013602 (2023). https:\/\/doi.org\/10.1109\/TIP.2022.3231135","journal-title":"IEEE Trans. Image Process."},{"key":"1820_CR8","doi-asserted-by":"publisher","unstructured":"Li, K., Wang, L., Liu, L., Ran, Q., Xu, K., Guo, Y.: Decoupling makes weakly supervised local feature better. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15838\u201315848. New Orleans (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01538","DOI":"10.1109\/CVPR52688.2022.01538"},{"key":"1820_CR9","doi-asserted-by":"publisher","unstructured":"DeTone, D., Malisiewicz, T., Rabinovich, A.: SuperPoint: self-supervised interest point detection and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 224\u2013236. Salt Lake City (2018). https:\/\/doi.org\/10.1109\/CVPRW.2018.00060","DOI":"10.1109\/CVPRW.2018.00060"},{"key":"1820_CR10","doi-asserted-by":"publisher","unstructured":"Dusmanu, M., Rocco, I., Pajdla, T., Pollefeys, M., Sivic, J., Larhus, D., et al.: D2-Net: a trainable CNN for joint description and detection of local features. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition 2025, pp. 8092\u20138101. Long Beach (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00828","DOI":"10.1109\/CVPR.2019.00828"},{"key":"1820_CR11","unstructured":"Revaud, J., Weinzaepfel, P., De\u00a0Souza, C., al.: R2d2: reliable and repeatable detector and descriptor. In: Proceedings of the 33rd Conference on Neural Information Processing Systems (NeurIPS), pp. 12405\u201312415. Curran Associates, Vancouver (2019)"},{"key":"1820_CR12","doi-asserted-by":"publisher","unstructured":"Gleize, P., Wang, W.Y., Feiszli, M.: SiLK: simple learned keypoints. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 22442\u201322451. Washington (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.02056","DOI":"10.1109\/ICCV51070.2023.02056"},{"key":"1820_CR13","doi-asserted-by":"publisher","unstructured":"Potje, G., Cadar, F., Araujo, A., Martins, R., Nascimento, E.R.: XFeat: accelerated features for lightweight image matching. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2682\u20132691. Seattle (2024). https:\/\/doi.org\/10.1109\/CVPR52733.2024.00259","DOI":"10.1109\/CVPR52733.2024.00259"},{"issue":"9","key":"1820_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.isci.2024.110825","volume":"27","author":"Y Guo","year":"2024","unstructured":"Guo, Y., Li, W., Zhai, P., Wu, L.: Feature matching based on local windows aggregation. iScience 27(9), 110825 (2024). https:\/\/doi.org\/10.1016\/j.isci.2024.110825","journal-title":"iScience"},{"key":"1820_CR15","doi-asserted-by":"publisher","unstructured":"Sun, J., Shen, Z., Wang, Y., Bao, H., Zhou, X.: Loftr: Detector-free local feature matching with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8922\u20138931 (2021). https:\/\/doi.org\/10.1109\/APSIPAASC63619.2025.10848757","DOI":"10.1109\/APSIPAASC63619.2025.10848757"},{"key":"1820_CR16","doi-asserted-by":"publisher","unstructured":"Liu, W., Lu, H., Fu, H., Cao, Z.: Learning to upsample by learning to sample. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6004\u20136014. Paris (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.00554","DOI":"10.1109\/ICCV51070.2023.00554"},{"issue":"3","key":"1820_CR17","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1016\/j.cviu.2007.09.014","volume":"110","author":"H Bay","year":"2008","unstructured":"Bay, H., Ess, A., Tuytelaars, T., Van Gool, L.: Speeded-up robust features (surf). Comput. Vis. Image Understand. 110(3), 346\u2013359 (2008). https:\/\/doi.org\/10.1016\/j.cviu.2007.09.014","journal-title":"Comput. Vis. Image Understand."},{"key":"1820_CR18","doi-asserted-by":"publisher","unstructured":"Rublee, E., Rabaud, V., Konolige, K., Bradski, G.: ORB: an efficient alternative to SIFT or SURF. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 2564\u20132571. Barcelona (2011). https:\/\/doi.org\/10.1109\/ICCV.2011.6126544","DOI":"10.1109\/ICCV.2011.6126544"},{"key":"1820_CR19","doi-asserted-by":"publisher","unstructured":"Rosten, E., Drummond, T.: Machine learning for high-speed corner detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 430\u2013443. Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744023_34","DOI":"10.1007\/11744023_34"},{"key":"1820_CR20","unstructured":"Wang, W., Han, C., Zhou, T., Liu, D.: Visual recognition with deep nearest centroids. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=CsKwavjr7A"},{"issue":"2","key":"1820_CR21","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1109\/TPAMI.2015.2439281","volume":"38","author":"C Dong","year":"2015","unstructured":"Dong, C., Loy, C.C., He, K., Tang, X.: Image super-resolution using deep convolutional networks. IEEE Trans. Pattern Anal. Mach. Intell. 38(2), 295\u2013307 (2015). https:\/\/doi.org\/10.1109\/TPAMI.2015.2439281","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1820_CR22","doi-asserted-by":"publisher","unstructured":"Kim, J., Lee, J.K., Lee, K.M.: Accurate image super-resolution using very deep convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1646\u20131654 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.182","DOI":"10.1109\/CVPR.2016.182"},{"key":"1820_CR23","doi-asserted-by":"publisher","unstructured":"Lim, B., Son, S., Kim, H., Nah, S., Mu\u00a0Lee, K.: Enhanced deep residual networks for single image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 136\u2013144 (2017). https:\/\/doi.org\/10.1109\/CVPRW.2017.151","DOI":"10.1109\/CVPRW.2017.151"},{"key":"1820_CR24","doi-asserted-by":"publisher","unstructured":"Zhang, Y., Li, K., Li, K., Wang, L., Zhong, B., Fu, Y.: Image super-resolution using very deep residual channel attention networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 286\u2013301 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_18","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"1820_CR25","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351084","author":"Z Hui","year":"2019","unstructured":"Hui, Z., Gao, X., Yang, Y., Wang, X.: Lightweight image super-resolution with information multi-distillation network. ACM (2019). https:\/\/doi.org\/10.1145\/3343031.3351084","journal-title":"ACM"},{"key":"1820_CR26","doi-asserted-by":"publisher","unstructured":"Li, Z., Liu, Y., Chen, X., Cai, H., Gu, J., Qiao, Y., Dong, C.: Blueprint separable residual network for efficient image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 833\u2013843 (2022). https:\/\/doi.org\/10.1109\/CVPRW56347.2022.00099","DOI":"10.1109\/CVPRW56347.2022.00099"},{"key":"1820_CR27","doi-asserted-by":"publisher","unstructured":"Mao, Y., Zhang, N., Wang, Q., Bai, B., Bai, W., Fang, H., Liu, P., Li, M., Yan, S.: Multi-level dispersion residual network for efficient image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1660\u20131669 (2023). https:\/\/doi.org\/10.1109\/CVPRW59228.2023.00167","DOI":"10.1109\/CVPRW59228.2023.00167"},{"key":"1820_CR28","doi-asserted-by":"publisher","unstructured":"Sun, L., Dong, J., Tang, J., Pan, J.: Spatially-adaptive feature modulation for efficient image super-resolution. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13190\u201313199 (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.01213","DOI":"10.1109\/ICCV51070.2023.01213"},{"key":"1820_CR29","doi-asserted-by":"publisher","unstructured":"Zhou, L., Cai, H., Gu, J., Li, Z., Liu, Y., Chen, X., Qiao, Y., Dong, C.: Efficient image super-resolution using vast-receptive-field attention. In: European Conference on Computer Vision, pp. 256\u2013272. Springer (2022). https:\/\/doi.org\/10.1088\/1361-6501\/ad73ed","DOI":"10.1088\/1361-6501\/ad73ed"},{"key":"1820_CR30","doi-asserted-by":"publisher","unstructured":"Liu, D., Cui, Y., Yan, L., Mousas, C., Yang, B., Chen, Y.: Densernet: Weakly supervised visual localization using multi-scale feature aggregation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 6101\u20136109 (2021). https:\/\/doi.org\/10.1609\/aaai.v35i7.16760","DOI":"10.1609\/aaai.v35i7.16760"},{"key":"1820_CR31","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Vol. 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"1820_CR32","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"1820_CR33","doi-asserted-by":"publisher","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1820_CR34","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1820_CR35","unstructured":"Ramachandran, P., Parmar, N., Vaswani, A., Bello, I., Levskaya, A., Shlens, J.: Stand-alone self-attention in vision models. Advances in neural information processing systems 32 (2019)"},{"key":"1820_CR36","doi-asserted-by":"publisher","unstructured":"Zhao, H., Jia, J., Koltun, V.: Exploring self-attention for image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10076\u201310085 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01009","DOI":"10.1109\/CVPR42600.2020.01009"},{"key":"1820_CR37","doi-asserted-by":"publisher","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018). https:\/\/doi.org\/10.1109\/TPAMI.2019.2913372","DOI":"10.1109\/TPAMI.2019.2913372"},{"key":"1820_CR38","doi-asserted-by":"publisher","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: Cbam: Convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1820_CR39","doi-asserted-by":"publisher","unstructured":"Hu, H., Zhang, Z., Xie, Z., Lin, S.: Local relation networks for image recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3464\u20133473 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00356","DOI":"10.1109\/ICCV.2019.00356"},{"key":"1820_CR40","doi-asserted-by":"publisher","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00813","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1820_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103224","volume":"208","author":"C Hong","year":"2021","unstructured":"Hong, C., Chen, L., Liang, Y., Zeng, Z.: Stacked capsule graph autoencoders for geometry-aware 3d head pose estimation. Comput. Vis. Image Underst. 208, 103224 (2021). https:\/\/doi.org\/10.1016\/j.cviu.2021.103224","journal-title":"Comput. Vis. Image Underst."},{"issue":"1","key":"1820_CR42","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1007\/s13042-024-02262-9","volume":"16","author":"Y Xie","year":"2025","unstructured":"Xie, Y., Hong, C., Zhuang, W., Liu, L., Li, J.: Hogformer: high-order graph convolution transformer for 3d human pose estimation. Int. J. Mach. Learn. Cybern. 16(1), 599\u2013610 (2025). https:\/\/doi.org\/10.1007\/s13042-024-02262-9","journal-title":"Int. J. Mach. Learn. Cybern."},{"issue":"11","key":"1820_CR43","doi-asserted-by":"publisher","first-page":"9413","DOI":"10.1007\/s13042-025-02761-3","volume":"16","author":"X Lee","year":"2025","unstructured":"Lee, X., Hong, C., Zhang, X., Chen, Y.: Droformer: Temporal action detection with drop mechanism of attention. Int. J. Mach. Learn. Cybern. 16(11), 9413\u20139428 (2025). https:\/\/doi.org\/10.1007\/s13042-025-02761-3","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"1820_CR44","doi-asserted-by":"publisher","unstructured":"Peng, Z., Huang, W., Gu, S., Xie, L., Wang, Y., Jiao, J., Ye, Q.: Conformer: Local features coupling global representations for visual recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 367\u2013376 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2023.3243048","DOI":"10.1109\/TPAMI.2023.3243048"},{"key":"1820_CR45","unstructured":"Liang, J.C., Zhou, T., Liu, D., Wang, W.: Clustseg: Clustering for universal segmentation. In: Proceedings of the 40th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 202, pp. 20787\u201320809. PMLR, USA (2023). https:\/\/proceedings.mlr.press\/v202\/liang23h.html"},{"key":"1820_CR46","doi-asserted-by":"publisher","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.89","DOI":"10.1109\/ICCV.2017.89"},{"key":"1820_CR47","doi-asserted-by":"crossref","unstructured":"Zhu, X., Hu, H., Lin, S., Dai, J.: Deformable convnets v2: More deformable, better results. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9308\u20139316 (2019)","DOI":"10.1109\/CVPR.2019.00953"},{"key":"1820_CR48","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"1820_CR49","doi-asserted-by":"publisher","unstructured":"Zhang, X., Chen, Q., Ng, R., Koltun, V.: Zoom to learn, learn to zoom. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3762\u20133770 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00388","DOI":"10.1109\/CVPR.2019.00388"},{"key":"1820_CR50","unstructured":"Jin, C., Tanno, R., Mertzanidou, T., Panagiotaki, E., Alexander, D.C.: Learning to downsample for segmentation of ultra-high resolution images. In: International Conference on Learning Representations (2022)"},{"key":"1820_CR51","doi-asserted-by":"publisher","unstructured":"Zheng, M., Sun, L., Dong, J., Liu, X., Ma, Z.: SMFANet: A lightweight self-modulation feature aggregation network for efficient image super-resolution. In: Proceedings of The European Conference on Computer Vision (ECCV), pp. 317\u2013332. Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-72973-7_21","DOI":"10.1007\/978-3-031-72973-7_21"},{"key":"1820_CR52","doi-asserted-by":"publisher","unstructured":"Pan, X.R., Ge, C.J., Lu, R., Song, Y., Chen, G., Huang, Z., et al.: On the integration of self-attention and convolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, pp. 805\u2013815 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.00089","DOI":"10.1109\/CVPR52688.2022.00089"},{"key":"1820_CR53","doi-asserted-by":"publisher","unstructured":"Li, Z., Snavely, N.: MegaDepth: Learning single-view depth prediction from internet photos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2041\u20132050. Salt Lake City (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00218","DOI":"10.1109\/CVPR.2018.00218"},{"key":"1820_CR54","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., et al.: Microsoft COCO: Common objects in context. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 740\u2013755. Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1820_CR55","doi-asserted-by":"publisher","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., et al.: ScanNet: richly-annotated 3D reconstructions of indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5828\u20135839. Honolulu (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.261","DOI":"10.1109\/CVPR.2017.261"},{"key":"1820_CR56","doi-asserted-by":"publisher","unstructured":"Balntas, V., Lenc, K., Vedaldi, A., Mikolajczyk, K.: HPatches: A benchmark and evaluation of handcrafted and learned local descriptors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5173\u20135182. Honolulu (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.410","DOI":"10.1109\/CVPR.2017.410"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-026-01820-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-026-01820-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-026-01820-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T14:31:16Z","timestamp":1778855476000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-026-01820-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,21]]},"references-count":56,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,5]]}},"alternative-id":["1820"],"URL":"https:\/\/doi.org\/10.1007\/s00138-026-01820-1","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4,21]]},"assertion":[{"value":"23 November 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 March 2026","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 April 2026","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 April 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"For this type of study, formal consent was not required, and this manuscript does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval and consent to participate"}},{"value":"All authors consent to publication.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"62"}}