{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T13:42:45Z","timestamp":1742996565207,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030377335"},{"type":"electronic","value":"9783030377342"}],"license":[{"start":{"date-parts":[[2019,12,24]],"date-time":"2019-12-24T00:00:00Z","timestamp":1577145600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-37734-2_20","type":"book-chapter","created":{"date-parts":[[2019,12,26]],"date-time":"2019-12-26T19:03:00Z","timestamp":1577386980000},"page":"241-253","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Single View Depth Estimation via Dense Convolution Network with Self-supervision"],"prefix":"10.1007","author":[{"given":"Yunhan","family":"Sun","sequence":"first","affiliation":[]},{"given":"Jinlong","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Suqin","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Zhengxing","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,12,24]]},"reference":[{"key":"20_CR1","unstructured":"Chakrabarti, A., Shao, J., Shakhnarovich, G.: Depth from a single image by harmonizing overcomplete local network predictions. In: Advances in Neural Information Processing Systems, pp. 2658\u20132666 (2016)"},{"key":"20_CR2","unstructured":"Chen, W., Fu, Z., Yang, D., Deng, J.: Single-image depth perception in the wild. In: Advances in Neural Information Processing Systems, pp. 730\u2013738 (2016)"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Eigen, D., Fergus, R.: Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2650\u20132658 (2015)","DOI":"10.1109\/ICCV.2015.304"},{"key":"20_CR5","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Advances in Neural Information Processing Systems, pp. 2366\u20132374 (2014)"},{"key":"20_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-46484-8_45","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Garg","year":"2016","unstructured":"Garg, R., Vijay Kumar, B.G., Carneiro, G., Reid, I.: Unsupervised CNN for single view depth estimation: geometry to the rescue. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 740\u2013756. Springer, Cham (2016). \nhttps:\/\/doi.org\/10.1007\/978-3-319-46484-8_45"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The Kitti vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"20_CR8","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 315\u2013323 (2011)"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: CVPR, p. 7 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Weinberger, K.Q., van der Maaten, L.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, p. 3 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"20_CR11","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456 (2015)"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Jafari, O.H., Groth, O., Kirillov, A., Yang, M.Y., Rother, C.: Analyzing modular CNN architectures for joint depth prediction and semantic segmentation. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 4620\u20134627. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989537"},{"key":"20_CR13","unstructured":"Kar, A., H\u00e4ne, C., Malik, J.: Learning a multi-view stereo machine. In: Advances in Neural Information Processing Systems, pp. 364\u2013375 (2017)"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Kuznietsov, Y., St\u00fcckler, J., Leibe, B.: Semi-supervised deep learning for monocular depth map prediction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6647\u20136655 (2017)","DOI":"10.1109\/CVPR.2017.238"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Laina, I., Rupprecht, C., Belagiannis, V., Tombari, F., Navab, N.: Deeper depth prediction with fully convolutional residual networks. In: 2016 Fourth International Conference on 3D Vision (3DV), pp. 239\u2013248. IEEE (2016)","DOI":"10.1109\/3DV.2016.32"},{"key":"20_CR16","unstructured":"Li, B., Shen, C., Dai, Y., van den Hengel, A., He, M.: Depth and surface normal estimation from monocular images using regression on deep features and hierarchical CRFs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1119\u20131127 (2015)"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Li, R., Wang, S., Long, Z., Gu, D.: UnDeepVO: monocular visual odometry through unsupervised deep learning. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 7286\u20137291. IEEE (2018)","DOI":"10.1109\/ICRA.2018.8461251"},{"issue":"10","key":"20_CR18","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2016","unstructured":"Liu, F., Shen, C., Lin, G., Reid, I.: Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans. Pattern Anal. Mach. Intell. 38(10), 2024\u20132039 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Mahjourian, R., Wicke, M., Angelova, A.: Unsupervised learning of depth and ego-motion from monocular video using 3D geometric constraints. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5667\u20135675 (2018)","DOI":"10.1109\/CVPR.2018.00594"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Mancini, M., Costante, G., Valigi, P., Ciarfuglia, T.A.: Fast robust monocular depth estimation for obstacle detection with fully convolutional networks. In: 2016 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4296\u20134303. IEEE (2016)","DOI":"10.1109\/IROS.2016.7759632"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Mukasa, T., Xu, J., Stenger, B.: 3D scene mesh from CNN depth predictions and sparse monocular slam. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 921\u2013928 (2017)","DOI":"10.1109\/ICCVW.2017.112"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Pillai, S., Ambrus, R., Gaidon, A.: SuperDepth: self-supervised, super-resolved monocular depth estimation. arXiv preprint \narXiv:1810.01849\n\n (2018)","DOI":"10.1109\/ICRA.2019.8793621"},{"key":"20_CR23","unstructured":"Ramirez, P.Z., Poggi, M., Tosi, F., Mattoccia, S., Di Stefano, L.: Geometry meets semantics for semi-supervised monocular depth estimation. arXiv preprint \narXiv:1810.04093\n\n (2018)"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Repala, V.K., Dubey, S.R.: Dual CNN models for unsupervised monocular depth estimation. arXiv preprint \narXiv:1804.06324\n\n (2018)","DOI":"10.1007\/978-3-030-34869-4_23"},{"key":"20_CR25","unstructured":"Saxena, A., Chung, S.H., Ng, A.Y.: Learning depth from single monocular images. In: Advances in Neural Information Processing Systems, pp. 1161\u20131168 (2006)"},{"issue":"5","key":"20_CR26","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2009","unstructured":"Saxena, A., Sun, M., Ng, A.Y.: Make3D: learning 3D scene structure from a single still image. IEEE Trans. Pattern Anal. Mach. Intell. 31(5), 824\u2013840 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Tateno, K., Tombari, F., Laina, I., Navab, N.: CNN-SLAM: real-time dense monocular slam with learned depth prediction. arXiv preprint \narXiv:1704.03489\n\n (2017)","DOI":"10.1109\/CVPR.2017.695"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Tulsiani, S., Zhou, T., Efros, A.A., Malik, J.: Multi-view supervision for single-view reconstruction via differentiable ray consistency. In: CVPR, p. 3 (2017)","DOI":"10.1109\/CVPR.2017.30"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Ummenhofer, B., et al.: DeMoN: depth and motion network for learning monocular stereo. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), vol. 5 (2017)","DOI":"10.1109\/CVPR.2017.596"},{"key":"20_CR30","unstructured":"Vijayanarasimhan, S., Ricco, S., Schmid, C., Sukthankar, R., Fragkiadaki, K.: SfM-Net: learning of structure and motion from video. arXiv preprint \narXiv:1704.07804\n\n (2017)"},{"key":"20_CR31","unstructured":"Wang, P., Shen, X., Lin, Z., Cohen, S., Price, B., Yuille, A.L.: Towards unified depth and semantic prediction from a single image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2800\u20132809 (2015)"},{"issue":"4","key":"20_CR32","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"20_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"842","DOI":"10.1007\/978-3-319-46493-0_51","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Xie","year":"2016","unstructured":"Xie, J., Girshick, R., Farhadi, A.: Deep3D: fully automatic 2D-to-3D video conversion with deep convolutional neural networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 842\u2013857. Springer, Cham (2016). \nhttps:\/\/doi.org\/10.1007\/978-3-319-46493-0_51"},{"key":"20_CR34","unstructured":"Yan, X., Yang, J., Yumer, E., Guo, Y., Lee, H.: Perspective transformer nets: learning single-view 3D object reconstruction without 3D supervision. In: Advances in Neural Information Processing Systems, pp. 1696\u20131704 (2016)"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Yin, Z., Shi, J.: GeoNet: unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1983\u20131992 (2018)","DOI":"10.1109\/CVPR.2018.00212"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1851\u20131858 (2017)","DOI":"10.1109\/CVPR.2017.700"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-37734-2_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,6]],"date-time":"2020-02-06T13:10:15Z","timestamp":1580994615000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-37734-2_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,24]]},"ISBN":["9783030377335","9783030377342"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-37734-2_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019,12,24]]},"assertion":[{"value":"24 December 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 January 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 January 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.mmm2020.kr\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"171","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Of the 171 submissions, 46 were accepted as poster papers; of the 49 special session paper submissions, 28 were accepted for oral presentation and 8 for poster presentation; 9 demo papers and 10 VBS papers were also accepted.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}