{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T03:36:05Z","timestamp":1773977765203,"version":"3.50.1"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100002837","name":"Chang&apos;an University","doi-asserted-by":"publisher","award":["300102405101"],"award-info":[{"award-number":["300102405101"]}],"id":[{"id":"10.13039\/501100002837","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017550","name":"Shaanxi Science and Technology Association","doi-asserted-by":"publisher","award":["20250463"],"award-info":[{"award-number":["20250463"]}],"id":[{"id":"10.13039\/501100017550","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011710","name":"Shaanxi Provincial Science and Technology Department","doi-asserted-by":"publisher","award":["2025JC-YBQN-871"],"award-info":[{"award-number":["2025JC-YBQN-871"]}],"id":[{"id":"10.13039\/501100011710","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Advanced Engineering Informatics"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.aei.2026.104339","type":"journal-article","created":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T18:41:36Z","timestamp":1768416096000},"page":"104339","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PB","title":["Depth-guided cross-modal fusion and diffusion-based enhancement for robust pavement defect segmentation"],"prefix":"10.1016","volume":"71","author":[{"given":"Yihui","family":"Shan","sequence":"first","affiliation":[]},{"given":"Wei","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jiaqi","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Yansong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhenzhen","family":"Xing","sequence":"additional","affiliation":[]},{"given":"Jiangang","family":"Ding","sequence":"additional","affiliation":[]},{"given":"Lili","family":"Pei","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.aei.2026.104339_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.conbuildmat.2021.125991","article-title":"Evaluation of inverted pavement by structural condition indicators from falling weight deflectometer","volume":"319","author":"Jiang","year":"2022","journal-title":"Constr. Build. Mater."},{"issue":"16","key":"10.1016\/j.aei.2026.104339_b2","doi-asserted-by":"crossref","first-page":"2481","DOI":"10.1111\/mice.13200","article-title":"AI-enabled airport runway pavement distress detection using dashcam imagery","volume":"39","author":"Malekloo","year":"2024","journal-title":"Comput-Aided Civ. Infrastruct. Eng."},{"issue":"4","key":"10.1016\/j.aei.2026.104339_b3","article-title":"Deep learning-based RGB-D fusion for multimodal condition assessment of civil infrastructure","volume":"37","author":"Mondal","year":"2023","journal-title":"J. Comput. Civ. Eng."},{"key":"10.1016\/j.aei.2026.104339_b4","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102903","article-title":"Modal-invariant progressive representation for multimodal image registration","volume":"117","author":"Ding","year":"2025","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.aei.2026.104339_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.autcon.2021.103605","article-title":"Crack segmentation through deep convolutional neural networks and heterogeneous image fusion","volume":"125","author":"Zhou","year":"2021","journal-title":"Autom. Constr."},{"key":"10.1016\/j.aei.2026.104339_b6","series-title":"Explicit attention-enhanced fusion for RGB-thermal perception tasks","author":"Liang","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b7","series-title":"Computer Vision \u2013 ECCV 2020","first-page":"340","article-title":"SNE-RoadSeg: Incorporating surface normal information into semantic segmentation for accurate freespace detection","author":"Fan","year":"2020"},{"key":"10.1016\/j.aei.2026.104339_b8","series-title":"Understanding dark scenes by contrasting multi-modal observations","author":"Dong","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b9","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","first-page":"7608","article-title":"AsymFormer: Asymmetrical cross-modal representation learning for mobile platform real-time RGB-D semantic segmentation","author":"Du","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b10","series-title":"Segment anything","author":"Kirillov","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b11","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.103709","article-title":"Generalist models in medical image segmentation: A survey and performance comparison with task-specific approaches","volume":"127","author":"Moglia","year":"2026","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.aei.2026.104339_b12","series-title":"2025 IEEE\/CVF International Conference on Computer Vision","article-title":"Anomaly detection of integrated circuits package substrates using the large vision model SAIC: Dataset construction, methodology, and application","author":"Yu","year":"2025"},{"key":"10.1016\/j.aei.2026.104339_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.autcon.2025.106497","article-title":"Bimodal defect segmentation with geometric prior-supported anti-imbalance learning for pavement defect evaluation and repair","volume":"180","author":"Wang","year":"2025","journal-title":"Autom. Constr."},{"key":"10.1016\/j.aei.2026.104339_b14","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110685","article-title":"An empirical study on the robustness of the segment anything model (SAM)","volume":"155","author":"Wang","year":"2024","journal-title":"Pattern Recognit."},{"issue":"2","key":"10.1016\/j.aei.2026.104339_b15","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1109\/TPAMI.2007.1176","article-title":"Automatic estimation and removal of noise from a single image","volume":"30","author":"Liu","year":"2008","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104339_b16","series-title":"RobustSAM: Segment anything robustly on degraded images","author":"Chen","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b17","series-title":"Denoising diffusion probabilistic models","author":"Ho","year":"2020"},{"key":"10.1016\/j.aei.2026.104339_b18","series-title":"High-resolution image synthesis with latent diffusion models","author":"Rombach","year":"2022"},{"key":"10.1016\/j.aei.2026.104339_b19","series-title":"Deep multimodal fusion by channel exchanging","author":"Wang","year":"2020"},{"key":"10.1016\/j.aei.2026.104339_b20","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"12176","article-title":"Multimodal token fusion for vision transformers","author":"Wang","year":"2022"},{"key":"10.1016\/j.aei.2026.104339_b21","doi-asserted-by":"crossref","DOI":"10.1016\/j.jvcir.2023.103991","article-title":"Indoor semantic segmentation based on swin-transformer","volume":"98","author":"Zheng","year":"2024","journal-title":"J. Vis. Commun. Image Represent."},{"key":"10.1016\/j.aei.2026.104339_b22","series-title":"GeminiFusion: Efficient pixel-wise multimodal fusion for vision transformer","author":"Jia","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b23","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2020.104042","article-title":"Deep multimodal fusion for semantic image segmentation: A survey","volume":"105","author":"Zhang","year":"2021","journal-title":"Image Vis. Comput."},{"issue":"3","key":"10.1016\/j.aei.2026.104339_b24","doi-asserted-by":"crossref","first-page":"1223","DOI":"10.1109\/TCSVT.2022.3208833","article-title":"RGB-T semantic segmentation with location, activation, and sharpening","volume":"33","author":"Li","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.aei.2026.104339_b25","first-page":"1","article-title":"A multilevel multimodal fusion transformer for remote sensing semantic segmentation","volume":"62","author":"Ma","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104339_b26","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","first-page":"7608","article-title":"AsymFormer: Asymmetrical cross-modal representation learning for mobile platform real-time RGB-D semantic segmentation","author":"Du","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b27","first-page":"1","article-title":"A unified framework with multimodal fine-tuning for remote sensing semantic segmentation","volume":"63","author":"Ma","year":"2025","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104339_b28","series-title":"2024 IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"829","article-title":"Understanding dark scenes by contrasting multi-modal observations","author":"Dong","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.autcon.2024.105838","article-title":"Crack instance segmentation using splittable transformer and position coordinates","volume":"168","author":"Zhao","year":"2024","journal-title":"Autom. Constr."},{"key":"10.1016\/j.aei.2026.104339_b30","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103669","article-title":"Optimizing concrete surface defect detection with adaptive supervision and scribble annotations","volume":"68","author":"Wang","year":"2025","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104339_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103536","article-title":"Limited label-support pavement damage segmentation network with uniform rectification and intrinsic cross-dimensional constraint","volume":"67","author":"Yan","year":"2025","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104339_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2024.102665","article-title":"Pavement raveling inspection using a new image texture-based feature set and artificial intelligence","volume":"62","author":"Nasertork","year":"2024","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104339_b33","doi-asserted-by":"crossref","DOI":"10.1016\/j.measurement.2020.108698","article-title":"RENet: Rectangular convolution pyramid and edge enhancement network for salient object detection of pavement cracks","volume":"170","author":"Wang","year":"2021","journal-title":"Measurement"},{"key":"10.1016\/j.aei.2026.104339_b34","doi-asserted-by":"crossref","DOI":"10.1016\/j.autcon.2023.105192","article-title":"CNN-based pavement defects detection using grey and depth images","volume":"158","author":"Li","year":"2024","journal-title":"Autom. Constr."},{"key":"10.1016\/j.aei.2026.104339_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.autcon.2024.105756","article-title":"Self-adaptive 2D3D image fusion for automated pixel-level pavement crack detection","volume":"168","author":"Jing","year":"2024","journal-title":"Autom. Constr."},{"key":"10.1016\/j.aei.2026.104339_b36","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103674","article-title":"MKGM: Multimodal knowledge-guided joint recognition of bridge defect-structural information","volume":"68","author":"Yang","year":"2025","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.aei.2026.104339_b37","series-title":"Computer Vision \u2013 ECCV 2020 Workshops","first-page":"285","article-title":"We learn better road pothole detection: From attention aggregation to adversarial domain adaptation","author":"Fan","year":"2020"},{"issue":"4","key":"10.1016\/j.aei.2026.104339_b38","doi-asserted-by":"crossref","first-page":"4687","DOI":"10.1109\/TIV.2024.3376534","article-title":"Segmentation of road negative obstacles based on dual semantic-feature complementary fusion for autonomous driving","volume":"9","author":"Feng","year":"2024","journal-title":"IEEE Trans. Intell. Veh."},{"issue":"8","key":"10.1016\/j.aei.2026.104339_b39","doi-asserted-by":"crossref","first-page":"4871","DOI":"10.1109\/LRA.2023.3272517","article-title":"InconSeg: Residual-guided fusion with inconsistent multi-modal data for negative and positive road obstacles segmentation","volume":"8","author":"Feng","year":"2023","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.aei.2026.104339_b40","series-title":"SAM 2: Segment anything in images and videos","author":"Ravi","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b41","series-title":"Segment anything in high quality","author":"Ke","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b42","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3699","article-title":"ASAM: Boosting segment anything model with adversarial tuning","author":"Li","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b43","series-title":"DiffusionDet: Diffusion model for object detection","author":"Chen","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b44","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102540","article-title":"FDiff-fusion: Denoising diffusion fusion network based on fuzzy learning for 3D medical image segmentation","volume":"112","author":"Ding","year":"2024","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.aei.2026.104339_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102870","article-title":"LG-diff: Learning to follow local class-regional guidance for nearshore image cross-modality high-quality translation","volume":"117","author":"Ding","year":"2025","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.aei.2026.104339_b46","series-title":"MedSegDiff-V2: Diffusion based medical image segmentation with transformer","author":"Wu","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b47","series-title":"Unleashing text-to-image diffusion models for visual perception","author":"Zhao","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b48","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.aei.2026.104339_b49","series-title":"APSeg: Auto-prompt network for cross-domain few-shot semantic segmentation","author":"He","year":"2024"},{"key":"10.1016\/j.aei.2026.104339_b50","series-title":"2025 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2366","article-title":"Segment any-quality images with generative latent space enhancement","author":"Guo","year":"2025"},{"key":"10.1016\/j.aei.2026.104339_b51","series-title":"Masked-attention mask transformer for universal image segmentation","author":"Cheng","year":"2022"},{"key":"10.1016\/j.aei.2026.104339_b52","article-title":"Round-the-clock all-in-one automatic defect perception: Frequency-driven fusion for generalized pavements","author":"Shan","year":"2025","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"10.1016\/j.aei.2026.104339_b53","series-title":"MH\/T 5024-2019: Technical Specifications for Evaluation and Management of Civil Airport Pavements","author":"Civil Aviation Administration of China","year":"2019"},{"key":"10.1016\/j.aei.2026.104339_b54","series-title":"2023 IEEE\/CVF International Conference on Computer Vision Workshops","first-page":"3359","article-title":"SAM-adapter: Adapting segment anything in underperformed scenes","author":"Chen","year":"2023"},{"key":"10.1016\/j.aei.2026.104339_b55","series-title":"Fused Image dataset for convolutional neural Network-based crack Detection (FIND)","author":"Zhou","year":"2022"}],"container-title":["Advanced Engineering Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626000315?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626000315?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T02:43:50Z","timestamp":1773974630000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1474034626000315"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":55,"alternative-id":["S1474034626000315"],"URL":"https:\/\/doi.org\/10.1016\/j.aei.2026.104339","relation":{},"ISSN":["1474-0346"],"issn-type":[{"value":"1474-0346","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Depth-guided cross-modal fusion and diffusion-based enhancement for robust pavement defect segmentation","name":"articletitle","label":"Article Title"},{"value":"Advanced Engineering Informatics","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.aei.2026.104339","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104339"}}