{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T09:50:37Z","timestamp":1758361837747,"version":"3.44.0"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T00:00:00Z","timestamp":1740441600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T00:00:00Z","timestamp":1740441600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306310"],"award-info":[{"award-number":["62306310"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10489-025-06382-9","type":"journal-article","created":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T00:00:20Z","timestamp":1740441620000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Instructed fine-tuning based on semantic consistency constraint for deep multi-view stereo"],"prefix":"10.1007","volume":"55","author":[{"given":"Yan","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4296-2289","authenticated-orcid":false,"given":"Hongping","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kun","family":"Ding","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tingting","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yueyue","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,25]]},"reference":[{"key":"6382_CR1","doi-asserted-by":"crossref","unstructured":"Xu Q, Tao W (2019) Multi-scale geometric consistency guided multi-view stereo. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5483\u20135492","DOI":"10.1109\/CVPR.2019.00563"},{"key":"6382_CR2","doi-asserted-by":"crossref","unstructured":"Xu Q, Kong W, Tao W, Pollefeys M (2022) Multi-scale geometric consistency guided and planar prior assisted multi-view stereo. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2022.3200074"},{"key":"6382_CR3","doi-asserted-by":"crossref","unstructured":"Wang Y, Zeng Z, Guan T, Yang W, Chen Z, Liu W, Xu L, Luo Y (2023) Adaptive patch deformation for textureless-resilient multi-view stereo. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1621\u20131630","DOI":"10.1109\/CVPR52729.2023.00162"},{"key":"6382_CR4","doi-asserted-by":"publisher","first-page":"6289","DOI":"10.1109\/TIP.2023.3331309","volume":"32","author":"H Liu","year":"2023","unstructured":"Liu H, Zhang C, Deng Y, Liu T, Zhang Z, Li Y-F (2023) Orientation cues-aware facial relationship representation for head pose estimation via transformer. IEEE Trans Image Process 32:6289\u20136302","journal-title":"IEEE Trans Image Process"},{"key":"6382_CR5","unstructured":"Liu H, Zhang C, Deng Y, Xie B, Liu T, Li Y-F (2023) Transifc: Invariant cues-aware feature concentration learning for efficient fine-grained bird image classification. IEEE Trans Multimed"},{"key":"6382_CR6","doi-asserted-by":"crossref","unstructured":"Liu H, Zhou Q, Zhang C, Zhu J, Liu T, Zhang Z, Li Y-F (2024) Mmatrans: Muscle movement aware representation learning for facial expression recognition via transformers. IEEE Trans Ind Inform","DOI":"10.1109\/TII.2024.3431640"},{"key":"6382_CR7","doi-asserted-by":"crossref","unstructured":"Yao Y, Luo Z, Li S, Fang T, Quan L (2018) Mvsnet: Depth inference for unstructured multi-view stereo. In: Proceedings of the European conference on computer vision (ECCV), pp 767\u2013783","DOI":"10.1007\/978-3-030-01237-3_47"},{"key":"6382_CR8","doi-asserted-by":"crossref","unstructured":"Wang F, Galliani S, Vogel C, Speciale P, Pollefeys M (2021) Patchmatchnet: Learned multi-view patchmatch stereo. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14194\u201314203","DOI":"10.1109\/CVPR46437.2021.01397"},{"key":"6382_CR9","doi-asserted-by":"crossref","unstructured":"Wang F, Galliani S, Vogel C, Pollefeys M (2022) Itermvs: Iterative probability estimation for efficient multi-view stereo. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8606\u20138615","DOI":"10.1109\/CVPR52688.2022.00841"},{"key":"6382_CR10","doi-asserted-by":"crossref","unstructured":"Mi Z, Di C, Xu D (2022) Generalized binary search network for highly-efficient multi-view stereo. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12991\u201313000","DOI":"10.1109\/CVPR52688.2022.01265"},{"key":"6382_CR11","doi-asserted-by":"crossref","unstructured":"Su W, Tao W (2023) Efficient edge-preserving multi-view stereo network for depth estimation. In: Proceedings of the AAAI conference on artificial intelligence, vol 37, pp 2348\u20132356","DOI":"10.1609\/aaai.v37i2.25330"},{"key":"6382_CR12","doi-asserted-by":"crossref","unstructured":"Wu J, Li R, Xu H, Zhao W, Zhu Y, Sun J, Zhang Y (2024) Gomvs: Geometrically consistent cost aggregation for multi-view stereo. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 20207\u201320216","DOI":"10.1109\/CVPR52733.2024.01910"},{"key":"6382_CR13","unstructured":"Cao C, Ren X, Fu Y (2024) Mvsformer++: Revealing the devil in transformer\u2019s details for multi-view stereo. In: The Twelfth international conference on learning representations"},{"issue":"21","key":"6382_CR14","doi-asserted-by":"publisher","first-page":"26230","DOI":"10.1007\/s10489-023-04910-z","volume":"53","author":"J Chen","year":"2023","unstructured":"Chen J, Yu Z, Ma L, Zhang K (2023) Uncertainty awareness with adaptive propagation for multi-view stereo. Appl Intell 53(21):26230\u201326239","journal-title":"Appl Intell"},{"key":"6382_CR15","doi-asserted-by":"crossref","unstructured":"Liu T, Ye X, Zhao W, Pan Z, Shi M, Cao Z (2023) When epipolar constraint meets non-local operators in multi-view stereo. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 18088\u201318097","DOI":"10.1109\/ICCV51070.2023.01658"},{"issue":"8","key":"6382_CR16","doi-asserted-by":"publisher","first-page":"10905","DOI":"10.1007\/s11063-023-11356-4","volume":"55","author":"S Xu","year":"2023","unstructured":"Xu S, Xu Q, Su W, Tao W (2023) Edge-aware spatial propagation network for multi-view depth estimation. Neural Process Lett 55(8):10905\u201310923","journal-title":"Neural Process Lett"},{"key":"6382_CR17","doi-asserted-by":"crossref","unstructured":"Zhang J, Wang X, Bai X, Wang C, Huang L, Chen Y, Gu L, Zhou J, Harada T, Hancock ER (2022) Revisiting domain generalized stereo matching networks from a feature consistency perspective. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13001\u201313011","DOI":"10.1109\/CVPR52688.2022.01266"},{"key":"6382_CR18","doi-asserted-by":"crossref","unstructured":"Xu H, Zhou Z, Qiao Y, Kang W, Wu Q (2021) Self-supervised multi-view stereo via effective co-segmentation and data-augmentation. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, pp 3030\u20133038","DOI":"10.1609\/aaai.v35i4.16411"},{"issue":"6","key":"6382_CR19","doi-asserted-by":"publisher","first-page":"1336","DOI":"10.1109\/TKDE.2012.51","volume":"25","author":"Y-X Wang","year":"2012","unstructured":"Wang Y-X, Zhang Y-J (2012) Nonnegative matrix factorization: A comprehensive review. IEEE Trans Knowl Data Eng 25(6):1336\u20131353","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"6382_CR20","doi-asserted-by":"crossref","unstructured":"Yuan Z, Cao J, Li Z, Jiang H, Wang Z (2024) Sd-mvs: Segmentation-driven deformation multi-view stereo with spherical refinement and em optimization. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 6871\u20136880","DOI":"10.1609\/aaai.v38i7.28512"},{"key":"6382_CR21","doi-asserted-by":"crossref","unstructured":"Kirillov A, Mintun E, Ravi N, Mao H, Rolland C, Gustafson L, Xiao T, Whitehead S, Berg AC, Lo W-Y et al (2023) Segment anything. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4015\u20134026","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"6382_CR22","unstructured":"Ren T, Liu S, Zeng A, Lin J, Li K, Cao H, Chen J, Huang X, Chen Y, Yan F et al (2024) Grounded sam: Assembling open-world models for diverse visual tasks. arXiv preprint arXiv:2401.14159"},{"key":"6382_CR23","doi-asserted-by":"crossref","unstructured":"Ulusoy AO, Black MJ, Geiger A (2017) Semantic multi-view stereo: Jointly estimating objects and voxels. In: 2017 IEEE Conference on computer vision and pattern recognition (CVPR), IEEE, pp 4531\u20134540","DOI":"10.1109\/CVPR.2017.482"},{"issue":"4","key":"6382_CR24","doi-asserted-by":"publisher","first-page":"389","DOI":"10.4310\/CIS.2020.v20.n4.a1","volume":"20","author":"Y Jin","year":"2020","unstructured":"Jin Y, Jiang D, Cai M (2020) 3d reconstruction using deep learning: a survey. Commun Inf Syst 20(4):389\u2013413","journal-title":"Commun Inf Syst"},{"key":"6382_CR25","doi-asserted-by":"crossref","unstructured":"Schonberger JL, Frahm J-M (2016) Structure-from-motion revisited. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4104\u20134113","DOI":"10.1109\/CVPR.2016.445"},{"key":"6382_CR26","doi-asserted-by":"crossref","unstructured":"Xu Q, Tao W (2019) Multi-scale geometric consistency guided multi-view stereo. Comput Vis Pattern Recognit (CVPR)","DOI":"10.1109\/CVPR.2019.00563"},{"key":"6382_CR27","doi-asserted-by":"crossref","unstructured":"Romanoni A, Matteucci M (2019) Tapa-mvs: Textureless-aware patchmatch multi-view stereo. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10413\u201310422","DOI":"10.1109\/ICCV.2019.01051"},{"key":"6382_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110565","volume":"154","author":"Z Yuan","year":"2024","unstructured":"Yuan Z, Cao J, Wang Z, Li Z (2024) Tsar-mvs: Textureless-aware segmentation and correlative refinement guided multi-view stereo. Pattern Recogn 154:110565","journal-title":"Pattern Recogn"},{"key":"6382_CR29","doi-asserted-by":"crossref","unstructured":"Liu T, Liu H, Yang B, Zhang Z (2023) Ldcnet: limb direction cues-aware network for flexible human pose estimation in industrial behavioral biometrics systems. IEEE Trans Ind Inform","DOI":"10.1109\/TII.2023.3266366"},{"key":"6382_CR30","unstructured":"Liu H, Liu T, Chen Y, Zhang Z, Li Y-F (2022) Ehpe: Skeleton cues-based gaussian coordinate encoding for efficient human pose estimation. IEEE Trans Multimed"},{"issue":"10","key":"6382_CR31","doi-asserted-by":"publisher","first-page":"7107","DOI":"10.1109\/TII.2022.3143605","volume":"18","author":"H Liu","year":"2022","unstructured":"Liu H, Liu T, Zhang Z, Sangaiah AK, Yang B, Li Y (2022) Arhpe: Asymmetric relation-aware representation learning for head pose estimation in industrial human-computer interaction. IEEE Trans Industr Inf 18(10):7107\u20137117","journal-title":"IEEE Trans Industr Inf"},{"key":"6382_CR32","doi-asserted-by":"crossref","unstructured":"Li H, Guo Y, Zheng X, Xiong H (2024) Learning deformable hypothesis sampling for accurate patchmatch multi-view stereo. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 3082\u20133090","DOI":"10.1609\/aaai.v38i4.28091"},{"issue":"183","key":"6382_CR33","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1111\/phor.12459","volume":"38","author":"H Hu","year":"2023","unstructured":"Hu H, Su L, Mao S, Chen M, Pan G, Xu B, Zhu Q (2023) Adaptive region aggregation for multi-view stereo matching using deformable convolutional networks. Photogram Rec 38(183):430\u2013449","journal-title":"Photogram Rec"},{"key":"6382_CR34","doi-asserted-by":"crossref","unstructured":"Wang X, Zhu Z, Huang G, Qin F, Ye Y, He Y, Chi X, Wang X (2022) Mvster: Epipolar transformer for efficient multi-view stereo. In: European conference on computer vision, Springer, pp 573\u2013591","DOI":"10.1007\/978-3-031-19821-2_33"},{"key":"6382_CR35","doi-asserted-by":"crossref","unstructured":"Chen W, Xu H, Zhou Z, Liu Y, Sun B, Kang W, Xie X (2023) Costformer: cost transformer for cost aggregation in multi-view stereo. In: Proceedings of the thirty-second international joint conference on artificial intelligence, pp 599\u2013608","DOI":"10.24963\/ijcai.2023\/67"},{"key":"6382_CR36","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inform Process Syst 30"},{"key":"6382_CR37","doi-asserted-by":"crossref","unstructured":"Ding Y, Yuan W, Zhu Q, Zhang H, Liu X, Wang Y, Liu X (2022) Transmvsnet: Global context-aware multi-view stereo network with transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8585\u20138594","DOI":"10.1109\/CVPR52688.2022.00839"},{"key":"6382_CR38","unstructured":"Zhao X, Ding W, An Y, Du Y, Yu T, Li M, Tang M, Wang J (2023) Fast segment anything. arXiv preprint arXiv:2306.12156"},{"key":"6382_CR39","unstructured":"Zhang C, Han D, Qiao Y, Kim JU, Bae S-H, Lee S, Hong CS (2023) Faster segment anything: Towards lightweight sam for mobile applications. arXiv preprint arXiv:2306.14289"},{"key":"6382_CR40","unstructured":"Ke L, Ye M, Danelljan M, Tai Y-W, Tang C-K, Yu F et al (2024) Segment anything in high quality. Adv Neural Inform Process Syst 36"},{"key":"6382_CR41","doi-asserted-by":"crossref","unstructured":"Liu S, Zeng Z, Ren T, Li F, Zhang H, Yang J, Jiang Q, Li C, Yang J, Su H et al (2025) Grounding dino: Marrying dino with grounded pre-training for open-set object detection. In: European conference on computer vision, Springer, pp 38\u201355","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"6382_CR42","doi-asserted-by":"crossref","unstructured":"Zhang Y, Huang K, Chen C, Chen Q, Heng P-A (2023) Satta: Semantic-aware test-time adaptation for cross-domain medical image segmentation. In: International conference on medical image computing and computer-assisted intervention, pp 148\u2013158","DOI":"10.1007\/978-3-031-43895-0_14"},{"key":"6382_CR43","doi-asserted-by":"publisher","unstructured":"Enomoto S, Hasegawa N, Adachi K, Sasaki T, Yamaguchi S, Suzuki S, Eda T (2024) Test-time adaptation meets image enhancement: Improving accuracy via uncertainty-aware logit switching. In: 2024 International joint conference on neural networks (IJCNN), pp 1\u20138. https:\/\/doi.org\/10.1109\/IJCNN60899.2024.10650964","DOI":"10.1109\/IJCNN60899.2024.10650964"},{"key":"6382_CR44","doi-asserted-by":"crossref","unstructured":"Nam H, Jung DS, Oh Y, Lee KM (2023) Cyclic test-time adaptation on monocular video for 3d human mesh reconstruction. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 14829\u201314839","DOI":"10.1109\/ICCV51070.2023.01362"},{"key":"6382_CR45","unstructured":"Kenton JDM-WC, Toutanova LK (2019) Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of naacL-HLT, Minneapolis, Minnesota, vol 1, p 2"},{"key":"6382_CR46","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"6382_CR47","doi-asserted-by":"crossref","unstructured":"Aans, Henrik, Jensen, Rasmus, Vogiatzis, George, Tola, Engin, Dahl, Anders (2016) Large-scale data for multiple-view stereopsis. Int J ofuter Vis 120(2):153\u2013168","DOI":"10.1007\/s11263-016-0902-9"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06382-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06382-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06382-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T19:37:16Z","timestamp":1758310636000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06382-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,25]]},"references-count":47,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["6382"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06382-9","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2025,2,25]]},"assertion":[{"value":"14 February 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 February 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no Conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"473"}}