{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T18:08:39Z","timestamp":1778782119208,"version":"3.51.4"},"reference-count":42,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.engappai.2026.114611","type":"journal-article","created":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T16:40:20Z","timestamp":1774716020000},"page":"114611","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Towards robust visual odometry in dynamic environments: A hybrid approach with confidence-guided masking"],"prefix":"10.1016","volume":"175","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-4458-0753","authenticated-orcid":false,"given":"Chen","family":"Xie","sequence":"first","affiliation":[]},{"given":"Ning","family":"Hao","sequence":"additional","affiliation":[]},{"given":"Haoxuan","family":"Han","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7923-3277","authenticated-orcid":false,"given":"Fenghua","family":"He","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.engappai.2026.114611_b1","doi-asserted-by":"crossref","first-page":"5191","DOI":"10.1109\/LRA.2021.3068640","article-title":"DynaSLAM II: Tightly-coupled multi-object tracking and SLAM","volume":"6","author":"Bescos","year":"2021","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"4","key":"10.1016\/j.engappai.2026.114611_b2","doi-asserted-by":"crossref","first-page":"4076","DOI":"10.1109\/LRA.2018.2860039","article-title":"DynaSLAM: Tracking, mapping, and inpainting in dynamic scenes","volume":"3","author":"Bescos","year":"2018","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.engappai.2026.114611_b3","series-title":"Better plain ViT baselines for ImageNet-1k","author":"Beyer","year":"2022"},{"key":"10.1016\/j.engappai.2026.114611_b4","doi-asserted-by":"crossref","DOI":"10.1007\/s11263-021-01484-6","article-title":"Unsupervised scale-consistent depth learning from video","author":"Bian","year":"2021","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"10.1016\/j.engappai.2026.114611_b5","series-title":"European Conference on Computer Vision","first-page":"205","article-title":"Swin-unet: Unet-like pure transformer for medical image segmentation","author":"Cao","year":"2022"},{"key":"10.1016\/j.engappai.2026.114611_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109262","article-title":"Learning generalized visual odometry using position-aware optical flow and geometric bundle adjustment","volume":"136","author":"Cao","year":"2023","journal-title":"Pattern Recognit."},{"issue":"10","key":"10.1016\/j.engappai.2026.114611_b7","doi-asserted-by":"crossref","first-page":"5842","DOI":"10.3390\/app13105842","article-title":"StereoVO: Learning stereo visual odometry approach based on optical flow and depth information","volume":"13","author":"Duan","year":"2023","journal-title":"Appl. Sci."},{"key":"10.1016\/j.engappai.2026.114611_b8","series-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3354","article-title":"Are we ready for autonomous driving? the kitti vision benchmark suite","author":"Geiger","year":"2012"},{"key":"10.1016\/j.engappai.2026.114611_b9","series-title":"2025 IEEE International Conference on Robotics and Automation","first-page":"8738","article-title":"Lightstereo: Channel boost is all you need for efficient 2d cost aggregation","author":"Guo","year":"2025"},{"issue":"6","key":"10.1016\/j.engappai.2026.114611_b10","doi-asserted-by":"crossref","first-page":"642","DOI":"10.1177\/0278364919843996","article-title":"Complex urban dataset with multi-level sensors from highly diverse urban environments","volume":"38","author":"Jeong","year":"2019","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.engappai.2026.114611_b11","series-title":"Ultralytics YOLO11. 2024","author":"Jocher","year":"2024"},{"issue":"2","key":"10.1016\/j.engappai.2026.114611_b12","doi-asserted-by":"crossref","first-page":"4244","DOI":"10.1109\/LRA.2022.3150854","article-title":"SimVODIS++: Neural semantic visual odometry in dynamic environments","volume":"7","author":"Kim","year":"2022","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.engappai.2026.114611_b13","doi-asserted-by":"crossref","unstructured":"Lai, L., Yin, Z., Ohn-Bar, E., 2025. ZeroVO: Visual Odometry with Minimal Assumptions. In: Proceedings of the Computer Vision and Pattern Recognition Conference. pp. 17092\u201317102.","DOI":"10.1109\/CVPR52734.2025.01593"},{"key":"10.1016\/j.engappai.2026.114611_b14","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B., 2021. Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10.1016\/j.engappai.2026.114611_b15","doi-asserted-by":"crossref","first-page":"23772","DOI":"10.1109\/ACCESS.2021.3050617","article-title":"RDS-SLAM: Real-time dynamic SLAM using semantic segmentation methods","volume":"9","author":"Liu","year":"2021","journal-title":"IEEE Access"},{"key":"10.1016\/j.engappai.2026.114611_b16","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.111326","article-title":"Mamba model guided deep visual-inertial odometry","volume":"158","author":"Liu","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"5","key":"10.1016\/j.engappai.2026.114611_b17","doi-asserted-by":"crossref","first-page":"1255","DOI":"10.1109\/TRO.2017.2705103","article-title":"Orb-slam2: An open-source slam system for monocular, stereo, and rgb-d cameras","volume":"33","author":"Mur-Artal","year":"2017","journal-title":"IEEE Trans. Robot."},{"key":"10.1016\/j.engappai.2026.114611_b18","series-title":"Dinov2: Learning robust visual features without supervision","author":"Oquab","year":"2023"},{"issue":"3","key":"10.1016\/j.engappai.2026.114611_b19","doi-asserted-by":"crossref","first-page":"3448","DOI":"10.1109\/TITS.2022.3228042","article-title":"Deep dual-resolution networks for real-time and accurate semantic segmentation of traffic scenes","volume":"24","author":"Pan","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.engappai.2026.114611_b20","series-title":"MAC-VO: Metrics-aware covariance for learning-based stereo visual odometry","author":"Qiu","year":"2024"},{"key":"10.1016\/j.engappai.2026.114611_b21","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.engappai.2026.114611_b22","doi-asserted-by":"crossref","unstructured":"Ranjan, A., Jampani, V., Balles, L., Kim, K., Sun, D., Wulff, J., Black, M.J., 2019. Competitive collaboration: Joint unsupervised learning of depth, camera motion, optical flow and motion segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 12240\u201312249.","DOI":"10.1109\/CVPR.2019.01252"},{"key":"10.1016\/j.engappai.2026.114611_b23","series-title":"2023 IEEE International Conference on Robotics and Automation","first-page":"4048","article-title":"Dytanvo: Joint refinement of visual odometry and motion segmentation in dynamic environments","author":"Shen","year":"2023"},{"key":"10.1016\/j.engappai.2026.114611_b24","series-title":"European Conference on Computer Vision","first-page":"572","article-title":"Feature-metric loss for self-supervised learning of depth and egomotion","author":"Shu","year":"2020"},{"key":"10.1016\/j.engappai.2026.114611_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.108466","article-title":"VIO-DualProNet: Visual-inertial odometry with learning based process noise covariance","volume":"133","author":"Solodar","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.114611_b26","series-title":"2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"573","article-title":"A benchmark for the evaluation of RGB-d SLAM systems","author":"Sturm","year":"2012"},{"key":"10.1016\/j.engappai.2026.114611_b27","series-title":"Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part II 16","first-page":"402","article-title":"Raft: Recurrent all-pairs field transforms for optical flow","author":"Teed","year":"2020"},{"key":"10.1016\/j.engappai.2026.114611_b28","first-page":"39033","article-title":"Deep patch visual odometry","volume":"36","author":"Teed","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114611_b29","series-title":"2017 IEEE International Conference on Robotics and Automation","first-page":"2043","article-title":"Deepvo: Towards end-to-end visual odometry with deep recurrent convolutional neural networks","author":"Wang","year":"2017"},{"key":"10.1016\/j.engappai.2026.114611_b30","series-title":"WAFT: Warping-alone field transforms for optical flow","author":"Wang","year":"2025"},{"key":"10.1016\/j.engappai.2026.114611_b31","series-title":"Conference on Robot Learning","first-page":"1761","article-title":"Tartanvo: A generalizable learning-based vo","author":"Wang","year":"2021"},{"key":"10.1016\/j.engappai.2026.114611_b32","doi-asserted-by":"crossref","unstructured":"Wei, Z., Chen, L., Jin, Y., Ma, X., Liu, T., Ling, P., Wang, B., Chen, H., Zheng, J., 2024. Stronger fewer & superior: Harnessing vision foundation models for domain generalized semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 28619\u201328630.","DOI":"10.1109\/CVPR52733.2024.02704"},{"key":"10.1016\/j.engappai.2026.114611_b33","doi-asserted-by":"crossref","unstructured":"Wen, B., Trepte, M., Aribido, J., Kautz, J., Gallo, O., Birchfield, S., 2025. Foundationstereo: Zero-shot stereo matching. In: Proceedings of the Computer Vision and Pattern Recognition Conference. pp. 5249\u20135260.","DOI":"10.1109\/CVPR52734.2025.00495"},{"key":"10.1016\/j.engappai.2026.114611_b34","series-title":"Argoverse 2: Next generation datasets for self-driving perception and forecasting","author":"Wilson","year":"2023"},{"key":"10.1016\/j.engappai.2026.114611_b35","first-page":"1","article-title":"YOLO-SLAM: A semantic SLAM system towards dynamic environment with geometric constraint","author":"Wu","year":"2022","journal-title":"Neural Comput. Appl."},{"key":"10.1016\/j.engappai.2026.114611_b36","article-title":"Swformer-VO: A monocular visual odometry model based on swin transformer","author":"Wu","year":"2024","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.engappai.2026.114611_b37","doi-asserted-by":"crossref","unstructured":"Xie, B., Cao, J., Xie, J., Khan, F.S., Pang, Y., 2024. Sed: A simple encoder-decoder for open-vocabulary semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3426\u20133436.","DOI":"10.1109\/CVPR52733.2024.00329"},{"key":"10.1016\/j.engappai.2026.114611_b38","doi-asserted-by":"crossref","DOI":"10.1109\/LRA.2024.3477292","article-title":"Instancevo: Self-supervised semantic visual odometry by using metric learning to incorporate geometrical priors in instance objects","author":"Xie","year":"2024","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.engappai.2026.114611_b39","series-title":"2022 IEEE\/SICE International Symposium on System Integration","first-page":"225","article-title":"Maskvo: Self-supervised visual odometry with a learnable dynamic mask","author":"Xuan","year":"2022"},{"key":"10.1016\/j.engappai.2026.114611_b40","series-title":"VDO-SLAM: A visual dynamic object-aware SLAM system","author":"Zhang","year":"2020"},{"key":"10.1016\/j.engappai.2026.114611_b41","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G., 2017. Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 1851\u20131858.","DOI":"10.1109\/CVPR.2017.700"},{"key":"10.1016\/j.engappai.2026.114611_b42","series-title":"European Conference on Computer Vision","first-page":"710","article-title":"Learning monocular visual odometry via self-supervised long-term modeling","author":"Zou","year":"2020"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626008924?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626008924?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T17:15:45Z","timestamp":1778778945000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626008924"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":42,"alternative-id":["S0952197626008924"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114611","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Towards robust visual odometry in dynamic environments: A hybrid approach with confidence-guided masking","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114611","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114611"}}