{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T03:42:06Z","timestamp":1768534926244,"version":"3.49.0"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T00:00:00Z","timestamp":1693785600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T00:00:00Z","timestamp":1693785600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52204177"],"award-info":[{"award-number":["52204177"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16581-6","type":"journal-article","created":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T08:07:33Z","timestamp":1693814853000},"page":"28215-28233","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Using full-scale feature fusion for self-supervised indoor depth estimation"],"prefix":"10.1007","volume":"83","author":[{"given":"Deqiang","family":"Cheng","sequence":"first","affiliation":[]},{"given":"Junhui","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Lv","sequence":"additional","affiliation":[]},{"given":"Chenggong","family":"Han","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3345-9665","authenticated-orcid":false,"given":"He","family":"Jiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,4]]},"reference":[{"key":"16581_CR1","doi-asserted-by":"crossref","unstructured":"Chibane J, Alldieck T, Pons-Moll G (2020) Implicit functions in feature space for 3d shape reconstruction and completion. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6970\u20136981","DOI":"10.1109\/CVPR42600.2020.00700"},{"key":"16581_CR2","doi-asserted-by":"crossref","unstructured":"Du R, Turner E, Dzitsiuk M, Prasso L, Duarte I, Dourgarian J, Afonso J, Pascoal J, Gladstone J, Cruces N (2020) DepthLab: Real-time 3D interaction with depth maps for mobile augmented reality. In: Proceedings of the 33rd Annual ACM Symposium on User Interface Software and Technology, pp 829\u2013843","DOI":"10.1145\/3379337.3415881"},{"key":"16581_CR3","doi-asserted-by":"crossref","unstructured":"Yin W, Liu Y, Shen C (2021) Virtual normal: enforcing geometric constraints for accurate and robust depth prediction. IEEE Trans Pattern Anal Mach Intell 44:7282\u20137295","DOI":"10.1109\/TPAMI.2021.3097396"},{"key":"16581_CR4","first-page":"3251","volume":"31","author":"C Han","year":"2022","unstructured":"Han C, Cheng D, Kou Q, Wang X, Chen L, Zhao J (2022) Self-supervised monocular Depth estimation with multi-scale structure similarity loss. Multimed Tools Appl 31:3251\u20133266","journal-title":"Multimed Tools Appl"},{"key":"16581_CR5","doi-asserted-by":"crossref","unstructured":"Lee S, Im S, Lin S,\u00a0 Kweon I.S (2021) Learning monocular depth in dynamic scenes via instance-aware projection consistency. In:Proceedings of the AAAI conference on artificial intelligence, pp 1863\u20131872","DOI":"10.1609\/aaai.v35i3.16281"},{"key":"16581_CR6","doi-asserted-by":"crossref","unstructured":"Liu L, Song X, Wang M, Liu Y, Zhang L (2021) Self-supervised monocular depth estimation for all day images using domain separation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12737\u201312746","DOI":"10.1109\/ICCV48922.2021.01250"},{"key":"16581_CR7","doi-asserted-by":"crossref","unstructured":"Wang H, Wang M, Che Z, Xu Z, Qiao X, Qi M, Feng F, Tang J (2022) RGB-Depth fusion GAN for indoor depth completion. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6209\u20136218","DOI":"10.1109\/CVPR52688.2022.00611"},{"key":"16581_CR8","doi-asserted-by":"crossref","unstructured":"Yan Z, Wang K, Li X, Zhang Z, Li J, Yang J (2022) RigNet: Repetitive image guided network for depth completion. In: European conference on computer vision, Springer, pp 214\u2013230","DOI":"10.1007\/978-3-031-19812-0_13"},{"key":"16581_CR9","doi-asserted-by":"publisher","first-page":"33759","DOI":"10.1007\/s11042-022-12301-8","volume":"81","author":"G Jung","year":"2022","unstructured":"Jung G, Yoon SM (2022) Monocular depth estimation with multi-view attention autoencoder. Multimed Tools Appl 81:33759\u201333770","journal-title":"Multimed Tools Appl"},{"key":"16581_CR10","doi-asserted-by":"publisher","first-page":"42485","DOI":"10.1007\/s11042-021-11212-4","volume":"81","author":"L Sun","year":"2022","unstructured":"Sun L, Li Y, Liu B, Xu L, Zhang Z, Zhu J (2022) Transferring knowledge from monocular completion for self-supervised monocular depth estimation. Multimed Tools Appl 81:42485\u201342495","journal-title":"Multimed Tools Appl"},{"key":"16581_CR11","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: Convolutional networks for biomedical image segmentation. In: International conference on medical image computing and computer-assisted intervention, Springer, pp 234\u2013241","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"16581_CR12","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"16581_CR13","doi-asserted-by":"crossref","unstructured":"Zhang H, Wu C, Zhang Z, Zhu Y, Lin H, Zhang Z, Sun Y, He T, Mueller J, Manmatha R (2022) Resnest: Split-attention networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2736\u20132746","DOI":"10.1109\/CVPRW56347.2022.00309"},{"key":"16581_CR14","doi-asserted-by":"crossref","unstructured":"Silberman N, Hoiem D, Kohli P, Fergus R (2012) Indoor segmentation and support inference from rgbd images. In: European conference on computer vision, Springer, pp 746\u2013760","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"16581_CR15","doi-asserted-by":"crossref","unstructured":"Dai A, Chang AX, Savva M, Halber M, Funkhouser T, Nie\u00dfner M (2017) Scannet: Richly-annotated 3d reconstructions of indoor scenes. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5828\u20135839","DOI":"10.1109\/CVPR.2017.261"},{"key":"16581_CR16","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2008","unstructured":"Saxena A, Sun M, Ng AY (2008) Make3d: learning 3d scene structure from a single still image. IEEE Trans Pattern Anal Mach Intell 31:824\u2013840","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"16581_CR17","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. Adv Neural Inf Process Syst 27(2):2366\u20132374"},{"key":"16581_CR18","doi-asserted-by":"crossref","unstructured":"Hu J, Ozay M, Zhang Y, Okatani T (2019) Revisiting single image depth estimation: toward higher resolution maps with accurate object boundaries. 2019 IEEE winter conference on applications of computer vision (WACV), IEEE, pp 1043\u20131051","DOI":"10.1109\/WACV.2019.00116"},{"key":"16581_CR19","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"16581_CR20","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"16581_CR21","unstructured":"Lee JH, Han M-K, Ko DW, Suh IH (2019) From big to small: Multi-scale local planar guidance for monocular depth estimation, arXiv preprint arXiv:1907.10326"},{"key":"16581_CR22","unstructured":"Bhat SF, Alhashim I, Wonka P (2021) Adabins: Depth estimation using adaptive bins. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4009\u20134018"},{"key":"16581_CR23","doi-asserted-by":"crossref","unstructured":"Ranftl R, Bochkovskiy A, Koltun V (2021) Vision transformers for dense prediction. Proceedings of the IEEE\/CVF international conference on computer vision, pp 12179\u201312188","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"16581_CR24","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S (2020) An image is worth 16x16 words: Transformers for image recognition at scale, arXiv preprint arXiv:2010.11929"},{"key":"16581_CR25","doi-asserted-by":"crossref","unstructured":"Garg R, Bg VK, Carneiro G, Reid I (2016) Unsupervised cnn for single view depth estimation: Geometry to the rescue. In: European conference on computer vision, Springer, pp 740\u2013756","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"16581_CR26","doi-asserted-by":"crossref","unstructured":"Godard C, Mac Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"16581_CR27","doi-asserted-by":"crossref","unstructured":"Zhou T, Brown M, Snavely N, Lowe DG (2017) Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1851\u20131858","DOI":"10.1109\/CVPR.2017.700"},{"key":"16581_CR28","doi-asserted-by":"crossref","unstructured":"Godard C, Mac Aodha O, Firman M, Brostow GJ (2019) Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3828\u20133838","DOI":"10.1109\/ICCV.2019.00393"},{"key":"16581_CR29","doi-asserted-by":"crossref","unstructured":"Lyu X, Liu L, Wang M, Kong X, Liu L, Liu Y, Chen X, Yuan Y (2021) Hr-depth: High resolution self-supervised monocular depth estimation. In: Proceedings of the AAAI conference on artificial intelligence, pp 2294\u20132301","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"16581_CR30","doi-asserted-by":"crossref","unstructured":"Jung H, Park E, Yoo S (2021) Fine-grained semantics-aware representation enhancement for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12642\u201312652","DOI":"10.1109\/ICCV48922.2021.01241"},{"key":"16581_CR31","doi-asserted-by":"crossref","unstructured":"Ji P, Li R, Bhanu B, Xu Y (2021) Monoindoor: Towards good practice of self-supervised monocular depth estimation for indoor environments. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12787\u201312796","DOI":"10.1109\/ICCV48922.2021.01255"},{"key":"16581_CR32","doi-asserted-by":"crossref","unstructured":"Li B, Huang Y, Liu Z, Zou D, Yu W (2021) StructDepth: Leveraging the structural regularities for self-supervised indoor depth estimation. Proceedings of the IEEE\/CVF international conference on computer vision, pp 12663\u201312673","DOI":"10.1109\/ICCV48922.2021.01243"},{"key":"16581_CR33","doi-asserted-by":"crossref","unstructured":"Yu Z, Jin L, Gao S (2020) P2Net: Patch-Match and Plane-Regularization for unsupervised indoor depth estimation. European Conference on Computer Vision, Springer, pp 206\u2013222","DOI":"10.1007\/978-3-030-58586-0_13"},{"key":"16581_CR34","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13:600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"16581_CR35","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"16581_CR36","doi-asserted-by":"crossref","unstructured":"Huang H, Lin L, Tong R, Hu H, Zhang Q, Iwamoto Y, Han X, Chen Y-W, Wu J (2020) Unet 3+: A full-scale connected unet for medical image segmentation. In: ICASSP 2020\u20132020 IEEE international conference on acoustics, speech and signal processing (ICASSP), IEEE, pp 1055\u20131059","DOI":"10.1109\/ICASSP40776.2020.9053405"},{"key":"16581_CR37","doi-asserted-by":"publisher","first-page":"1856","DOI":"10.1109\/TMI.2019.2959609","volume":"39","author":"Z Zhou","year":"2019","unstructured":"Zhou Z, Siddiquee MMR, Tajbakhsh N, Liang J (2019) Unet++: Redesigning skip connections to exploit multiscale features in image segmentation. IEEE Trans Med Imaging 39:1856\u20131867","journal-title":"IEEE Trans Med Imaging"},{"key":"16581_CR38","unstructured":"Clevert D-A, Unterthiner T, Hochreiter S (2016) Fast and accurate deep network learning by exponential linear units (elus). In: Proceedings of the International Conference on Learning Representations, pp 1\u201314"},{"key":"16581_CR39","doi-asserted-by":"crossref","unstructured":"Zhou J, Wang Y, Qin K, Zeng W (2019) Moving indoor: Unsupervised video depth learning in challenging environments. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 8618\u20138627","DOI":"10.1109\/ICCV.2019.00871"},{"key":"16581_CR40","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: Proceedings of the International Conference on Learning Representations, pp 1\u201315"},{"key":"16581_CR41","doi-asserted-by":"publisher","first-page":"3839","DOI":"10.1109\/TCSVT.2021.3118681","volume":"32","author":"Y Wei","year":"2021","unstructured":"Wei Y, Guo H, Lu J, Zhou J (2021) Iterative feature matching for self-supervised indoor depth estimation. IEEE Trans Circuits Syst Video Technol 32:3839\u20133852","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"16581_CR42","doi-asserted-by":"crossref","unstructured":"Wu C-Y, Wang J, Hall M, Neumann U, Su S (2022) Toward practical monocular indoor depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3814\u20133824","DOI":"10.1109\/CVPR52688.2022.00379"},{"key":"16581_CR43","doi-asserted-by":"crossref","unstructured":"Ladicky L, Shi J, Pollefeys M (2014) Pulling things out of perspective. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 89\u201396","DOI":"10.1109\/CVPR.2014.19"},{"key":"16581_CR44","doi-asserted-by":"crossref","unstructured":"Wang P, Shen X, Lin Z, Cohen S, Price B, Yuille AL (2015) Towards unified depth and semantic prediction from a single image. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2800\u20132809","DOI":"10.1109\/CVPR.2015.7298897"},{"key":"16581_CR45","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu F, Shen C, Lin G, Reid I (2015) Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans Pattern Anal Mach Intell 38:2024\u20132039","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"16581_CR46","doi-asserted-by":"crossref","unstructured":"Li J, Klein R, Yao A (2017) A two-streamed network for estimating fine-scaled depth maps from single rgb images. In: Proceedings of the IEEE international conference on computer vision, pp 3372\u20133380","DOI":"10.1109\/ICCV.2017.365"},{"key":"16581_CR47","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"key":"16581_CR48","doi-asserted-by":"crossref","unstructured":"Zhao W, Liu S, Shu Y, Liu Y-J (2020) Towards better generalization: Joint depth-pose learning without posenet. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9151\u20139161","DOI":"10.1109\/CVPR42600.2020.00917"},{"key":"16581_CR49","unstructured":"Bian J-W, Zhan H, Wang N, Chin T-J, Shen C, Reid I (2020) Unsupervised depth learning in challenging indoor video: Weak rectification to rescue, arXiv preprint arXiv:2006.02708"},{"key":"16581_CR50","unstructured":"Trockman A, Zico Kolter J (2022) Patches are all you need?, arXiv preprint at arXiv:2201.09792"},{"key":"16581_CR51","unstructured":"Ma X, Zhou Y, Wang H (2023) Can Qin, Bin Sun, Chang Liu, Yun Fu, Image as Set of Points. In: Proceedings of the International Conference on Learning Representations, pp 1\u201318"},{"key":"16581_CR52","doi-asserted-by":"crossref","unstructured":"Wu G, Zheng W-S, Lu Y, Tian Q (2023) PSLT: A light-weight vision transformer with ladder self-attention and progressive shift. IEEE Trans Pattern Anal Mach Intell 45:11120\u201311135","DOI":"10.1109\/TPAMI.2023.3265499"},{"key":"16581_CR53","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. Comput Sci 14.7:38\u201339"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16581-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16581-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16581-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,29]],"date-time":"2024-02-29T10:54:47Z","timestamp":1709204087000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16581-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,4]]},"references-count":53,"journal-issue":{"issue":"9","published-online":{"date-parts":[[2024,3]]}},"alternative-id":["16581"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16581-6","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,4]]},"assertion":[{"value":"19 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 August 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 September 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}