{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:31:11Z","timestamp":1763343071168,"version":"3.45.0"},"reference-count":48,"publisher":"Tech Science Press","issue":"3","license":[{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"vor","delay-in-days":144,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.062437","type":"journal-article","created":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T05:03:50Z","timestamp":1744002230000},"page":"4551-4573","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":0,"title":["Self-Supervised Monocular Depth Estimation with Scene Dynamic Pose"],"prefix":"10.32604","volume":"83","author":[{"given":"Jing","family":"He","sequence":"first","affiliation":[]},{"given":"Haonan","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Chenhao","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Minrui","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"22472","DOI":"10.1038\/s41598-024-72682-8","article-title":"Lightweight monocular depth estimation using a fusion-improved transformer","volume":"14","author":"Sui","year":"2024","journal-title":"Sci Rep"},{"key":"ref2","doi-asserted-by":"crossref","unstructured":"Ding F, Wen X, Zhu Y, Li Y, Lu CX. Robust 3D occupancy prediction with 4D imaging radar. arXiv:2405.14014. 2024.","DOI":"10.52202\/079017-3222"},{"key":"ref3","series-title":"2020 IEEE International Conference on Image Processing (ICIP)","first-page":"1441","article-title":"Depth estimation from single image and semantic prior","volume":"2020","author":"Hambarde","year":"2020 Oct 25\u201328"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1007\/s00138-024-01640-1","article-title":"Self-supervised monocular depth estimation via joint attention and intelligent mask loss","volume":"36","author":"Guo","year":"2024","journal-title":"Mach Vis Appl"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"104063","DOI":"10.1016\/j.jvcir.2024.104063","article-title":"Self-supervised learning monocular depth estimation from Internet photos","volume":"99","author":"Lin","year":"2024","journal-title":"J Vis Commun Image Represent"},{"key":"ref6","first-page":"806","article-title":"S2DNet: depth estimation from single image and sparse samples","volume":"6","author":"Hambarde","year":"2020","journal-title":"IEEE Trans Comput Imag"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"104862","DOI":"10.1016\/j.engappai.2022.104862","article-title":"Self-supervised monocular depth estimation in dynamic scenes with moving instance loss","volume":"112","author":"Yue","year":"2022","journal-title":"Eng Appl Artif Intell"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"104496","DOI":"10.1016\/j.dsp.2024.104496","article-title":"Self-supervised monocular depth estimation on water scenes via specular reflection prior","volume":"149","author":"Lu","year":"2024","journal-title":"Digit Signal Process"},{"key":"ref9","unstructured":"Chen L-Z, Liu K, Lin Y, Zhu S, Li Z, Cao X, et al. Flow distillation sampling: regularizing 3D gaussians with pre-trained matching priors. arXiv:2502.07615. 2025."},{"key":"ref10","doi-asserted-by":"crossref","first-page":"e39","DOI":"10.1561\/116.00000218","article-title":"Optical flow regularization of implicit neural representations for video frame interpolation","volume":"12","author":"Zhuang","year":"2023","journal-title":"APSIPA Trans Signal Inf Process"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"4137","DOI":"10.3390\/app13074137","article-title":"Improved first-order motion model of image animation with enhanced dense motion and repair ability","volume":"13","author":"Xu","year":"2023","journal-title":"Appl Sci"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"154","DOI":"10.1007\/s11063-024-11477-4","article-title":"CFDepthNet: monocular depth estimation introducing coordinate attention and texture features","volume":"56","author":"Wei","year":"2024","journal-title":"Neural Process Lett"},{"key":"ref13","unstructured":"Guo X, Yuan W, Zhang Y, Yang T, Zhang C, Zhu Z, et al. A simple baseline for supervised surround-view depth estimation. arXiv:2303.07759. 2023."},{"key":"ref14","series-title":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"10038","article-title":"Deep depth estimation from visual-inertial SLAM","volume":"2020","author":"Sartipi","year":"2020 Oct 24\u20132021 Jan 24"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"6956","DOI":"10.3390\/s21216956","article-title":"Joint soft-hard attention for self-supervised monocular depth estimation","volume":"21","author":"Fan","year":"2021","journal-title":"Sensors"},{"key":"ref16","doi-asserted-by":"crossref","unstructured":"Cheng B, Saggu IS, Shah R, Bansal G, Bharadia D. S3Net: semantic-aware self-supervised depth estimation with monocular videos and synthetic data. arXiv:2007.14511. 2020.","DOI":"10.1007\/978-3-030-58577-8_4"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"7927","DOI":"10.1007\/s40747-024-01575-0","article-title":"Repmono: a lightweight self-supervised monocular depth estimation architecture for high-speed inference","volume":"10","author":"Zhang","year":"2024","journal-title":"Complex Intell Syst"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"6547","DOI":"10.1109\/LRA.2023.3309134","article-title":"Exploring the mutual influence between self-supervised single-frame and multi-frame depth estimation","volume":"8","author":"Xiang","year":"2023","journal-title":"IEEE Robot Autom Lett"},{"key":"ref19","series-title":"5th Conference on Robot Learning (CoRL 2021)","first-page":"685","article-title":"Advancing self-supervised monocular depth learning with sparse lidar","volume":"164","author":"Feng","year":"2022"},{"key":"ref20","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"6612","article-title":"Unsupervised learning of depth and ego-motion from video","volume":"2017","author":"Zhou","year":"Jul 21\u201326, 2017"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"193","DOI":"10.3390\/ijgi13060193","article-title":"Learning effective geometry representation from videos for self-supervised monocular depth estimation","volume":"13","author":"Zhao","year":"2024","journal-title":"ISPRS Int J Geo Inf"},{"key":"ref22","unstructured":"Vijayanarasimhan S, Ricco S, Schmid C, Sukthankar R, Fragkiadaki K. SfM-Net: learning of structure and motion from video. arXiv:1704.07804. 2017."},{"key":"ref23","series-title":"2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"3827","article-title":"Digging into self-supervised monocular depth estimation","volume":"2019","author":"Godard","year":"2019 Oct 27\u2013Nov 2"},{"key":"ref24","first-page":"565","author":"He","year":"2022","journal-title":"Computer vision\u2014ECCV 2022"},{"key":"ref25","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1983","article-title":"GeoNet: unsupervised learning of dense depth, optical flow and camera pose","volume":"2018","author":"Yin","year":"2018 Jun 18\u201323"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"1193","DOI":"10.1007\/s00371-023-02840-y","article-title":"Self-supervised learning of monocular 3D geometry understanding with two- and three-view geometric constraints","volume":"40","author":"Liu","year":"2024","journal-title":"Vis Comput"},{"key":"ref27","first-page":"8001","article-title":"Depth prediction without the sensors: leveraging structure for unsupervised learning from monocular videos","volume":"33","author":"Casser","year":"2019","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"26912","DOI":"10.1109\/JSEN.2021.3120753","article-title":"Swin-depth: using transformers and multi-scale fusion for monocular-based depth estimation","volume":"21","author":"Cheng","year":"2021","journal-title":"IEEE Sens J"},{"key":"ref29","series-title":"2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"8976","article-title":"Depth from videos in the wild: unsupervised monocular depth learning from unknown cameras","volume":"2019","author":"Gordon","year":"2019 Oct 27\u2013Nov 2"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"103376","DOI":"10.1016\/j.cad.2022.103376","article-title":"Implicit randomized progressive-iterative approximation for curve and surface reconstruction","volume":"152","author":"Wang","year":"2022","journal-title":"Comput Aided Des"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"17450","DOI":"10.1109\/TNNLS.2023.3304291","article-title":"A fast evolutionary knowledge transfer search for multiscale deep neural architecture","volume":"35","author":"Zhang","year":"2024","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"102363","DOI":"10.1016\/j.inffus.2024.102363","article-title":"Self-supervised multi-frame depth estimation with visual-inertial pose transformer and monocular guidance","volume":"108","author":"Wang","year":"2024","journal-title":"Inf Fusion"},{"key":"ref33","unstructured":"Zhang S, Zhao C. Dyna-DepthFormer: multi-frame transformer for self-supervised depth estimation in dynamic scenes. arXiv:2301.05871. 2023."},{"key":"ref34","doi-asserted-by":"crossref","unstructured":"Shang J, Shen T, Li S, Zhou L, Zhen M, Fang T, et al. editors. Self-supervised monocular 3D face reconstruction by occlusion-aware multi-view geometry consistency. arXiv:2007.12494. 2020.","DOI":"10.1007\/978-3-030-58555-6_4"},{"key":"ref35","unstructured":"Sun Y, Xu Z, Wang X, Yao J. FlowDepth: decoupling optical flow for self-supervised monocular depth estimation. arXiv:2403.19294. 2024."},{"key":"ref36","series-title":"IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3354","article-title":"Are we ready for autonomous driving? The KITTI vision benchmark suite","volume":"2012","author":"Geiger","year":"2012 Jun 16\u201321"},{"key":"ref37","series-title":"IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"770","article-title":"Deep residual learning for image recognition","volume":"2016","author":"He","year":"2016 Jun 27\u201330"},{"key":"ref38","article-title":"Unsupervised learning of geometry from videos with edge-aware depth-normal consistency","volume":"32","author":"Yang","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref39","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"5667","article-title":"Unsupervised learning of depth and ego-motion from monocular video using 3D geometric constraints","volume":"2018","author":"Mahjourian","year":"2018 18\u201323"},{"key":"ref40","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2022","article-title":"Learning depth from monocular videos using direct methods","volume":"2018","author":"Wang","year":"2018 Jun 18\u201323"},{"key":"ref41","doi-asserted-by":"crossref","unstructured":"Zou Y, Luo Z, Huang J. DF-Net: unsupervised joint learning of depth and flow using cross-task consistency. arXiv:1809.01649. 2018.","DOI":"10.1007\/978-3-030-01228-1_3"},{"key":"ref42","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"225","article-title":"LEGO: learning edge with geometry all at once by watching videos","volume":"2018","author":"Yang","year":"2018 Jun 18\u201323"},{"key":"ref43","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"12232","article-title":"Competitive collaboration: joint unsupervised learning of depth, camera motion, optical flow and motion segmentation","volume":"2019","author":"Ranjan","year":"2019 Jun 15\u201320"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"2624","DOI":"10.1109\/TPAMI.2019.2930258","article-title":"Every pixel counts ++: joint learning of geometry and motion with 3D holistic understanding","volume":"42","author":"Luo","year":"2019","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref45","series-title":"2023 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","first-page":"1267","article-title":"Rebalancing gradient to improve self-supervised co-training of depth, odometry and optical flow predictions","volume":"2023","author":"Hariat","year":"2023 Jan 2\u20137"},{"key":"ref46","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1016\/j.neucom.2022.10.073","article-title":"GCNDepth: self-supervised monocular depth estimation based on graph convolutional network","volume":"517","author":"Masoumian","year":"2023","journal-title":"Neurocomputing"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"110301","DOI":"10.1016\/j.knosys.2023.110301","article-title":"SABV-Depth: a biologically inspired deep learning network for monocular depth estimation","volume":"263","author":"Wang","year":"2023","journal-title":"Knowl Based Syst"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","article-title":"ORB-SLAM: a versatile and accurate monocular SLAM system","volume":"31","author":"Mur-Artal","year":"2015","journal-title":"IEEE Trans Robot"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-83-3\/TSP_CMC_62437\/TSP_CMC_62437.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:29:44Z","timestamp":1763342984000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v83n3\/61013"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":48,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.062437","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2024-12-18","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-02-26","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-05-19","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}