{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:27:57Z","timestamp":1740122877171,"version":"3.37.3"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"25","license":[{"start":{"date-parts":[[2022,6,2]],"date-time":"2022-06-02T00:00:00Z","timestamp":1654128000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,6,2]],"date-time":"2022-06-02T00:00:00Z","timestamp":1654128000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62071500"],"award-info":[{"award-number":["62071500"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s11042-021-11500-z","type":"journal-article","created":{"date-parts":[[2022,6,2]],"date-time":"2022-06-02T03:49:16Z","timestamp":1654141756000},"page":"35899-35913","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-distillation framework for indoor and outdoor monocular depth estimation"],"prefix":"10.1007","volume":"81","author":[{"given":"Meng","family":"Pan","sequence":"first","affiliation":[]},{"given":"Huanrong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jiahao","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9670-7366","authenticated-orcid":false,"given":"Zhi","family":"Jin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,2]]},"reference":[{"key":"11500_CR1","unstructured":"Anil R, Pereyra G, Passos A, Ormandi R, Dahl GE, Hinton GE (2018) Large scale distributed neural network training through online distillation. In: ICLR"},{"key":"11500_CR2","unstructured":"Bhoi A (2019) Monocular depth estimation: A survey. In: arXiv preprint at\u00a0arXiv:1412.6572. Accessed 15 Jan 2021"},{"issue":"11","key":"11500_CR3","doi-asserted-by":"publisher","first-page":"3174","DOI":"10.1109\/TCSVT.2017.2740321","volume":"28","author":"Y Cao","year":"2017","unstructured":"Cao Y, Wu Z, Shen C (2017) Estimating depth from monocular images as classification using deep fully convolutional residual networks. IEEE Transactions on Circuits and Systems for Video Technology 28(11):3174\u20133182","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"2","key":"11500_CR4","first-page":"70","volume":"29","author":"ZL Cao","year":"2015","unstructured":"Cao ZL, Yan ZH, Wang H (2015) Summary of binocular stereo vision matching technology. Journal of Chongqing University of Technology (Natural Science) 29(2):70\u201375","journal-title":"Journal of Chongqing University of Technology (Natural Science)"},{"key":"11500_CR5","doi-asserted-by":"publisher","unstructured":"Chen P, Liu AH, Liu Y, Wang, YF (2019) Towards scene understanding: Unsupervised monocular depth estimation with semantic-aware representation. In: CVPR, pp 2619\u20132627. https:\/\/doi.org\/10.1109\/CVPR.2019.00273","DOI":"10.1109\/CVPR.2019.00273"},{"key":"11500_CR6","unstructured":"Chen W, Fu Z, Yang D, Deng J (2016) Single-image depth perception in the wild. In: Lee D, Sugiyama M, Luxburg U, Guyon I, Garnett R (eds) NIPS, 29, pp. 730\u2013738"},{"key":"11500_CR7","doi-asserted-by":"publisher","unstructured":"Dai A, Nie\u00dfner M, Zollh\u00f6fer M, Izadi S, Theobalt C (2017) Bundlefusion: Real-time globally consistent 3d reconstruction using on-the-fly surface reintegration. ACM Trans Graph 36(3). https:\/\/doi.org\/10.1145\/3054739","DOI":"10.1145\/3054739"},{"key":"11500_CR8","doi-asserted-by":"crossref","unstructured":"Droeschel D, Behnke S (2017) Mrslasermap: Local multiresolution grids for efficient 3d laser mapping and localization. In: Behnke S, Sheh R, Sar$$\\backslash$$iel, S, Lee DD (eds. RoboCup. Springer International Publishing, pp 319\u2013326","DOI":"10.1007\/978-3-319-68792-6_26"},{"key":"11500_CR9","doi-asserted-by":"publisher","unstructured":"Eigen D, Fergus R (2015) Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In: ICCV, pp 2650\u20132658. https:\/\/doi.org\/10.1109\/ICCV.2015.304","DOI":"10.1109\/ICCV.2015.304"},{"key":"11500_CR10","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. In: In NIPS, pp 2366\u20132374"},{"key":"11500_CR11","doi-asserted-by":"publisher","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. In: CVPR, pp 2002\u20132011. https:\/\/doi.org\/10.1109\/CVPR.2018.00214","DOI":"10.1109\/CVPR.2018.00214"},{"key":"11500_CR12","doi-asserted-by":"crossref","unstructured":"Garg R, Bg VK, Carneiro G, Reid I (2016) Unsupervised cnn for single view depth estimation: Geometry to the rescue. In: ECCV. Springer, pp 740\u2013756","DOI":"10.1007\/978-3-319-46484-8_45"},{"issue":"11","key":"11500_CR13","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R (2013) Vision meets robotics: The kitti dataset. The International Journal of Robotics Research 32(11):1231\u20131237","journal-title":"The International Journal of Robotics Research"},{"key":"11500_CR14","doi-asserted-by":"crossref","unstructured":"Godard C, Mac Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: CVPR, pp 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"11500_CR15","doi-asserted-by":"publisher","unstructured":"Godard C, Mac Aodha O, Firman M, Brostow GJ (2019) Digging into self-supervised monocular depth estimation. In: ICCV, pp 3827\u20133837. https:\/\/doi.org\/10.1109\/ICCV.2019.00393","DOI":"10.1109\/ICCV.2019.00393"},{"key":"11500_CR16","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531"},{"issue":"6","key":"11500_CR17","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun. ACM 60(6):84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun. ACM"},{"key":"11500_CR18","doi-asserted-by":"publisher","unstructured":"Kuznietsov Y, St\u00fcckler J, Leibe B (2017) Semi-supervised deep learning for monocular depth map prediction. In: CVPR, pp. 2215\u20132223. https:\/\/doi.org\/10.1109\/CVPR.2017.238","DOI":"10.1109\/CVPR.2017.238"},{"key":"11500_CR19","doi-asserted-by":"crossref","unstructured":"Laina I, Rupprecht C, Belagiannis V, Tombari F, Navab N (2016) Deeper depth prediction with fully convolutional residual networks. In: 3DV. IEEE. pp 239\u2013248","DOI":"10.1109\/3DV.2016.32"},{"key":"11500_CR20","unstructured":"Lee JH, Han M, Ko DW, Suh IH (2019) From big to small: Multi-scale local planar guidance for monocular depth estimation. In: arXiv"},{"key":"11500_CR21","doi-asserted-by":"crossref","unstructured":"Lee JH, Kim CS (2019) Monocular depth estimation using relative depth maps. In: CVPR, pp 9729\u20139738","DOI":"10.1109\/CVPR.2019.00996"},{"key":"11500_CR22","unstructured":"Li B, Shen C, Dai Y, Van Den Hengel A, He M (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. In: CVPR, pp 1119\u20131127"},{"key":"11500_CR23","doi-asserted-by":"crossref","unstructured":"Li R, Xian K, Shen C, Cao Z, Lu H, Hang L (2018) Deep attention-based classification network for robust depth prediction. In: Jawahar C, Li H, Mori G, Schindler K (eds) ACCV. Springer, pp 663\u2013678","DOI":"10.1007\/978-3-030-20870-7_41"},{"key":"11500_CR24","doi-asserted-by":"crossref","unstructured":"Liu F, Shen C, Lin G (2015) Deep convolutional neural fields for depth estimation from a single image. In: CVPR, pp 5162\u20135170","DOI":"10.1109\/CVPR.2015.7299152"},{"issue":"10","key":"11500_CR25","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu F, Shen C, Lin G, Reid I (2015) Learning depth from single monocular images using deep convolutional neural fields. IEEE transactions on pattern analysis and machine intelligence 38(10):2024\u20132039","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"11500_CR26","doi-asserted-by":"publisher","unstructured":"Mousavian A, Pirsiavash H, Ko\u0161eck\u00e1 J (2016) Joint semantic segmentation and depth estimation with deep convolutional networks. In: 3DV, pp 611\u2013619. https:\/\/doi.org\/10.1109\/3DV.2016.69","DOI":"10.1109\/3DV.2016.69"},{"key":"11500_CR27","unstructured":"Nathan Silberman Derek Hoiem PK, Fergus R Indoor segmentation and support inference from rgbd images.\u00a0https:\/\/cs.nyu.edu\/~silberman\/datasets\/nyu_depth_v2.html. Accessed 15 Jan 2021"},{"key":"11500_CR28","doi-asserted-by":"crossref","unstructured":"Nathan Silberman Derek Hoiem PK, Fergus R (2012) Indoor segmentation and support inference from rgbd images. In: ECCV, pp 746\u2013760","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"11500_CR29","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L., et al (2019) Pytorch: An imperative style, high-performance deep learning library. In: NIPS, pp 8026\u20138037"},{"key":"11500_CR30","doi-asserted-by":"publisher","unstructured":"Wang P, Shen X, Lin Z, Cohen S, Price B, Yuille A (2015) owards unified depth and semantic prediction from a single image. In: CVPR, pp 2800\u20132809. https:\/\/doi.org\/10.1109\/CVPR.2015.7298897","DOI":"10.1109\/CVPR.2015.7298897"},{"key":"11500_CR31","doi-asserted-by":"publisher","unstructured":"Poggi M, Aleotti F, Tosi F, Mattoccia S (2020) On the uncertainty of self-supervised monocular depth estimation. In: CVPR, pp 3224\u20133234. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00329","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"11500_CR32","doi-asserted-by":"publisher","unstructured":"Qi CR, Liu W, Wu C, Su H, Guibas LJ (2018) Frustum pointnets for 3d object detection from rgb-d data. In: CVPR, pp 918\u2013927. https:\/\/doi.org\/10.1109\/CVPR.2018.00102","DOI":"10.1109\/CVPR.2018.00102"},{"key":"11500_CR33","unstructured":"Romero A, Ballas N, Kahou SE, Chassang A, Gatta C, Bengio Y (2014) Fitnets: Hints for thin deep nets. arXiv preprint arXiv:1412.6550. Accessed 15 Jan 2021"},{"key":"11500_CR34","unstructured":"Saxena A, Chung SH, Ng AY (2006) Learning depth from single monocular images. In: NIPS, pp 1161\u20131168"},{"key":"11500_CR35","doi-asserted-by":"publisher","unstructured":"Shi S, Wang X, Li H (2019) Pointrcnn: 3d object proposal generation and detection from point cloud. In: CVPR, pp 770\u2013779. https:\/\/doi.org\/10.1109\/CVPR.2019.00086","DOI":"10.1109\/CVPR.2019.00086"},{"issue":"1153","key":"11500_CR36","first-page":"405","volume":"203","author":"S Ullman","year":"1979","unstructured":"Ullman S (1979) The interpretation of structure from motion. Royal Society of London 203(1153):405\u2013426","journal-title":"Royal Society of London"},{"key":"11500_CR37","doi-asserted-by":"publisher","unstructured":"Weder S, Sch\u00f6nberger J, Pollefeys M, Oswald MR (2020) Routedfusion: Learning real-time depth map fusion. In: CVPR, pp 4886\u20134896. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00494","DOI":"10.1109\/CVPR42600.2020.00494"},{"key":"11500_CR38","doi-asserted-by":"crossref","unstructured":"Whelan T, Salas-Moreno RF, Glocker B, Davison AJ, Leutenegger S (2016) Elasticfusion: Dense slam without a pose graph. Robotics: Science Systems 35(14), 1697\u20131716. https:\/doi.org\/10.1177\/0278364916669237","DOI":"10.1177\/0278364916669237"},{"key":"11500_CR39","doi-asserted-by":"crossref","unstructured":"Xu D, Wang W, Tang H, Liu H, Sebe N, Ricci E (2018) Structured attention guided convolutional neural fields for monocular depth estimation. In: CVPR, pp 3917\u20133925","DOI":"10.1109\/CVPR.2018.00412"},{"key":"11500_CR40","doi-asserted-by":"crossref","unstructured":"Yoneda K, Tehrani H, Ogawa T, Hukuyama N, Mita S (2014) Lidar scan feature for localization with highly precise 3-d map. In: Intelligent Vehicles Symposium Proceedings. IEEE pp 1345\u20131350","DOI":"10.1109\/IVS.2014.6856596"},{"key":"11500_CR41","doi-asserted-by":"publisher","unstructured":"Zhang L, Song J, Gao A, Chen J, Bao C, Ma K (2019) Be your own teacher: Improve the performance of convolutional neural networks via self distillation. In: ICCV, pp 3712\u20133721. https:\/\/doi.org\/10.1109\/ICCV.2019.00381","DOI":"10.1109\/ICCV.2019.00381"},{"issue":"2","key":"11500_CR42","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/MMUL.2012.24","volume":"19","author":"Z Zhang","year":"2012","unstructured":"Zhang Z (2012) Microsoft kinect sensor and its effect. IEEE multimedia 19(2):4\u201310. https:\/\/doi.org\/10.1109\/MMUL.2012.24","journal-title":"IEEE multimedia"},{"key":"11500_CR43","doi-asserted-by":"publisher","first-page":"1612","DOI":"10.1007\/s11431-020-1582-8","volume":"63","author":"C Zhao","year":"2017","unstructured":"Zhao C, Sun Q, Zhang C, Tang Y, Qian F (2017) Monocular depth estimation based on deep learning: An overview. Science China Technological Sciences 63:1612\u20131627","journal-title":"Science China Technological Sciences"},{"key":"11500_CR44","doi-asserted-by":"publisher","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: CVPR, pp. 6612\u20136619 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.700","DOI":"10.1109\/CVPR.2017.700"},{"key":"11500_CR45","first-page":"185","volume-title":"International Conference on Audio","author":"L Zou","year":"2010","unstructured":"Zou L, Li Y (2010) A method of stereo vision matching based on opencv. International Conference on Audio. Language and Image Processing, IEEE, pp 185\u2013190"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11500-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11500-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11500-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,24]],"date-time":"2022-09-24T04:19:01Z","timestamp":1663993141000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11500-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,2]]},"references-count":45,"journal-issue":{"issue":"25","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["11500"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11500-z","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2022,6,2]]},"assertion":[{"value":"23 March 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 June 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}