{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T21:47:59Z","timestamp":1780955279885,"version":"3.54.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["8212023"],"award-info":[{"award-number":["8212023"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["42071445"],"award-info":[{"award-number":["42071445"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1016\/j.engappai.2023.107800","type":"journal-article","created":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T05:14:21Z","timestamp":1704086061000},"page":"107800","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":34,"special_numbering":"C","title":["SA-MVSNet: Self-attention-based multi-view stereo network for 3D reconstruction of images with weak texture"],"prefix":"10.1016","volume":"131","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3394-090X","authenticated-orcid":false,"given":"Ronghao","family":"Yang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wang","family":"Miao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhenxin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4848-5850","authenticated-orcid":false,"given":"Zhenlong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9425-8974","authenticated-orcid":false,"given":"Mubai","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bin","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2023.107800_bib1","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1007\/s11263-016-0902-9","article-title":"Large-scale data for multiple-view stereopsis","volume":"120","author":"Aan\u00e6s","year":"2016","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.engappai.2023.107800_bib2","author":"Cao"},{"key":"10.1016\/j.engappai.2023.107800_bib4","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1538","article-title":"Point-based multi-view stereo network","author":"Chen","year":"2019"},{"key":"10.1016\/j.engappai.2023.107800_bib5","series-title":"Proceedings of the CVPR IEEE Computer Society Conference on Computer Vision and Pattern Recognition","first-page":"2521","article-title":"Deep stereo using adaptive thin volume representation with uncertainty awareness","author":"Cheng","year":"2020"},{"key":"10.1016\/j.engappai.2023.107800_bib6","series-title":"Proceedings of the CVPR IEEE Computer Society Conference on Computer Vision and Pattern Recognition","first-page":"358","article-title":"A space-sweep approach to true multi-image matching","author":"Collins","year":"1996"},{"key":"10.1016\/j.engappai.2023.107800_bib7","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"764","article-title":"Deformable convolutional networks","author":"Dai","year":"2017"},{"key":"10.1016\/j.engappai.2023.107800_bib8","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","volume":"vol. 1","author":"Devlin","year":"2019"},{"key":"10.1016\/j.engappai.2023.107800_bib9","series-title":"Proceedings of the 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8575","article-title":"TransMVSNet: global context-aware multi-view stereo network with transformers","author":"Ding","year":"2021"},{"key":"10.1016\/j.engappai.2023.107800_bib11","series-title":"An Image Is Worth 16x16 Words: Transformers for Image Recognition at Scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.engappai.2023.107800_bib12","series-title":"Proceedings of the 2019 IEEE\/CVF International Conference on Computer Vision","first-page":"4383","article-title":"Deeppruner: learning efficient stereo matching via differentiable patchmatch","author":"Duggal","year":"2019"},{"issue":"1\u20132","key":"10.1016\/j.engappai.2023.107800_bib13","first-page":"1","article-title":"Multi-view stereo: a tutorial","volume":"9","author":"Furukawa","year":"2015","journal-title":"Found. Trends\u00ae Comput. Graph. Vis."},{"key":"10.1016\/j.engappai.2023.107800_bib14","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"873","article-title":"Massively parallel multiview stereopsis by surface normal diffusion","author":"Galliani","year":"2015"},{"key":"10.1016\/j.engappai.2023.107800_bib15","doi-asserted-by":"crossref","first-page":"2861","DOI":"10.3390\/rs14122861","article-title":"Swin-transformer-enabled YOLOv5 with attention mechanism for small object detection on satellite images","volume":"14","author":"Gong","year":"2022","journal-title":"Rem. Sens."},{"key":"10.1016\/j.engappai.2023.107800_bib16","series-title":"Proceedings of the 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2492","article-title":"Cascade cost volume for high-resolution multi-view stereo and stereo matching","author":"Gu","year":"2020"},{"key":"10.1016\/j.engappai.2023.107800_bib17","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","article-title":"Attention mechanisms in computer vision: a survey","volume":"8","author":"Guo","year":"2022","journal-title":"Computational Visual Media"},{"key":"10.1016\/j.engappai.2023.107800_bib18","series-title":"Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition","first-page":"3268","article-title":"Group-wise correlation stereo network","author":"Guo","year":"2019"},{"key":"10.1016\/j.engappai.2023.107800_bib20","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13708","article-title":"Coordinate attention for efficient mobile network design","author":"Hou","year":"2021"},{"issue":"8","key":"10.1016\/j.engappai.2023.107800_bib21","doi-asserted-by":"crossref","first-page":"2011","DOI":"10.1109\/TPAMI.2019.2913372","article-title":"Squeeze-and-Excitation networks","volume":"42","author":"Hu","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.engappai.2023.107800_bib22","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"2307","article-title":"SurfaceNet: an end-to-end 3D neural network for multiview stereopsis","author":"Ji","year":"2017"},{"key":"10.1016\/j.engappai.2023.107800_bib23","series-title":"Adam: A Method for Stochastic Optimization","author":"Kingma","year":"2014"},{"issue":"4","key":"10.1016\/j.engappai.2023.107800_bib24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3072959.3073599","article-title":"Tanks and temples: benchmarking large-scale scene reconstruction","volume":"36","author":"Knapitsch","year":"2017","journal-title":"ACM Trans. Graph."},{"key":"10.1016\/j.engappai.2023.107800_bib25","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.108516","article-title":"ADR-MVSNet: a cascade network for 3D point cloud reconstruction with pixel occlusion","volume":"125","author":"Li","year":"2022","journal-title":"Pattern Recogn."},{"key":"10.1016\/j.engappai.2023.107800_bib26","series-title":"Learning Spatial Fusion for Single-Shot Object Detection","author":"Liu","year":"2019"},{"key":"10.1016\/j.engappai.2023.107800_bib28","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10012","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.engappai.2023.107800_bib29","first-page":"1","article-title":"Automatic differentiation in PCF","volume":"vol. 5","author":"Mazza","year":"2021"},{"key":"10.1016\/j.engappai.2023.107800_bib31","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"783","article-title":"Fcanet: frequency channel attention networks","author":"Qin","year":"2021"},{"key":"10.1016\/j.engappai.2023.107800_bib32","first-page":"1","article-title":"Stand-alone self-attention in vision models","volume":"32","author":"Ramachandran","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2023.107800_bib34","first-page":"5999","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"17","key":"10.1016\/j.engappai.2023.107800_bib35","doi-asserted-by":"crossref","first-page":"4244","DOI":"10.3390\/rs14174244","article-title":"HFENet: hierarchical feature extraction network for accurate landcover classification","volume":"14","author":"Wang","year":"2022","journal-title":"Rem. Sens."},{"key":"10.1016\/j.engappai.2023.107800_bib36","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"14194","article-title":"Patchmatchnet: learned multi-view patchmatch stereo","author":"Wang","year":"2021"},{"issue":"11","key":"10.1016\/j.engappai.2023.107800_bib37","doi-asserted-by":"crossref","first-page":"492","DOI":"10.3390\/horticulturae7110492","article-title":"SwinGD: a robust grape bunch detection model based on swin transformer in complex vineyard environment","volume":"7","author":"Wang","year":"2021","journal-title":"Horticulturae"},{"key":"10.1016\/j.engappai.2023.107800_bib38","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2021.102102","article-title":"Multi-view stereo in the deep learning era: a comprehensive review","volume":"70","author":"Wang","year":"2021","journal-title":"Displays"},{"key":"10.1016\/j.engappai.2023.107800_bib39","series-title":"European Conference on Computer Vision","first-page":"573","article-title":"MVSTER: epipolar transformer for efficient multi-view stereo","author":"Wang","year":"2022"},{"key":"10.1016\/j.engappai.2023.107800_bib40","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6187","article-title":"Aa-rmvsnet: adaptive aggregation recurrent multi-view stereo network","author":"Wei","year":"2021"},{"key":"10.1016\/j.engappai.2023.107800_bib41","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"3","article-title":"Cbam: convolutional block attention module","author":"Woo","year":"2018"},{"issue":"11","key":"10.1016\/j.engappai.2023.107800_bib42","doi-asserted-by":"crossref","first-page":"2611","DOI":"10.3390\/rs14112611","article-title":"A swin transformer-based encoding booster integrated in u-shaped network for building extraction","volume":"14","author":"Xiao","year":"2022","journal-title":"Rem. Sens."},{"key":"10.1016\/j.engappai.2023.107800_bib43","series-title":"European Conference on Computer Vision","first-page":"674","article-title":"Dense hybrid recurrent multi-view stereo net with dynamic consistency checking","author":"Yan","year":"2020"},{"key":"10.1016\/j.engappai.2023.107800_bib44","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4877","article-title":"Cost volume pyramid-based depth inference for multi-view stereo","author":"Yang","year":"2020"},{"key":"10.1016\/j.engappai.2023.107800_bib45","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"767","article-title":"Mvsnet: depth inference for unstructured multi-view stereo","author":"Yao","year":"2018"},{"key":"10.1016\/j.engappai.2023.107800_bib46","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5525","article-title":"Recurrent mvsnet for high-resolution multi-view stereo depth inference","author":"Yao","year":"2019"},{"key":"10.1016\/j.engappai.2023.107800_bib47","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1790","article-title":"Blendedmvs: a large-scale dataset for generalized multi-view stereo networks","author":"Yao","year":"2020"},{"key":"10.1016\/j.engappai.2023.107800_bib48","series-title":"Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IX 16","first-page":"766","article-title":"Pyramid multi-view stereo net with self-adaptive view aggregation","author":"Yi","year":"2020"},{"issue":"4","key":"10.1016\/j.engappai.2023.107800_bib49","doi-asserted-by":"crossref","first-page":"2789","DOI":"10.1007\/s11694-022-01396-0","article-title":"Swin-MLP: a strawberry appearance quality identification method by Swin Transformer and multi-layer perceptron","volume":"16","author":"Zheng","year":"2022","journal-title":"J. Food Meas. Char."},{"key":"10.1016\/j.engappai.2023.107800_bib50","series-title":"Multi-view Stereo with Transformer","author":"Zhu","year":"2021"},{"key":"10.1016\/j.engappai.2023.107800_bib51","series-title":"Deep Learning for Multi-View Stereo via Plane Sweep: A Survey","author":"Zhu","year":"2021"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095219762301984X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095219762301984X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T01:27:41Z","timestamp":1766194061000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S095219762301984X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":45,"alternative-id":["S095219762301984X"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2023.107800","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2024,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"SA-MVSNet: Self-attention-based multi-view stereo network for 3D reconstruction of images with weak texture","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2023.107800","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2023 Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"107800"}}