{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T22:15:43Z","timestamp":1762035343137,"version":"build-2065373602"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,3,22]],"date-time":"2021-03-22T00:00:00Z","timestamp":1616371200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,22]],"date-time":"2021-03-22T00:00:00Z","timestamp":1616371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2021,8]]},"DOI":"10.1007\/s11554-021-01092-0","type":"journal-article","created":{"date-parts":[[2021,3,22]],"date-time":"2021-03-22T11:03:01Z","timestamp":1616410981000},"page":"1357-1368","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Efficient unsupervised monocular depth estimation using attention guided generative adversarial network"],"prefix":"10.1007","volume":"18","author":[{"given":"Sumanta","family":"Bhattacharyya","sequence":"first","affiliation":[]},{"given":"Ju","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Stephen","family":"Welch","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,3,22]]},"reference":[{"key":"1092_CR1","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Advances in neural information processing systems, pp. 2366\u20132374 (2014)"},{"key":"1092_CR2","doi-asserted-by":"crossref","unstructured":"Xu, D., Wang, W., Tang, H., Liu, H., Sebe, N., Ricci, E.: Structured attention guided convolutional neural fields for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3917\u20133925 (2018)","DOI":"10.1109\/CVPR.2018.00412"},{"key":"1092_CR3","doi-asserted-by":"crossref","unstructured":"Godard, C., Aodha, O. M., Brostow, G. J.: Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 270\u2013279 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"1092_CR4","doi-asserted-by":"crossref","unstructured":"Pilzer, A., Xu, D., Puscas, M., Ricci, E., Sebe, N.: Unsupervised adversarial depth estimation using cycled generative networks. In: 2018 International conference on 3D vision (3DV), IEEE, pp. 587\u2013595 (2018)","DOI":"10.1109\/3DV.2018.00073"},{"key":"1092_CR5","doi-asserted-by":"crossref","unstructured":"Mehta, S., Rastegari, M., Shapiro, L., Hajishirzi, H.: Espnetv2: a light-weight, power efficient, and general purpose convolutional neural network,\u201d in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 9190\u20139200 (2019)","DOI":"10.1109\/CVPR.2019.00941"},{"issue":"1\u20133","key":"1092_CR6","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1014573219977","volume":"47","author":"D Scharstein","year":"2002","unstructured":"Scharstein, D., Szeliski, R.: A taxonomy and evaluation of dense two-frame stereo correspondence algorithms. Int. J. Comput. Vis. 47(1\u20133), 7\u201342 (2002)","journal-title":"Int. J. Comput. Vis."},{"key":"1092_CR7","doi-asserted-by":"crossref","unstructured":"Flynn, J., Neulander, I., Philbin, J., Snavely, N.: Deepstereo: learning to predict new views from the world\u2019s imagery. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5515\u20135524 (2016)","DOI":"10.1109\/CVPR.2016.595"},{"key":"1092_CR8","doi-asserted-by":"crossref","unstructured":"Saxena, A., Sun, M., Ng, A. Y.: Learning 3-d scene structure from a single still image. In: 2007 IEEE 11th International conference on computer vision, IEEE, pp. 1\u20138 (2007)","DOI":"10.1109\/ICCV.2007.4408828"},{"issue":"9","key":"1092_CR9","doi-asserted-by":"publisher","first-page":"3485","DOI":"10.1109\/TIP.2013.2270375","volume":"22","author":"J Konrad","year":"2013","unstructured":"Konrad, J., Wang, M., Ishwar, P., Wu, C., Mukherjee, D.: Learning-based, automatic 2d-to-3d image and video conversion. IEEE Trans. Image Process. 22(9), 3485\u20133496 (2013)","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"1092_CR10","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/s11263-006-0031-y","volume":"75","author":"D Hoiem","year":"2007","unstructured":"Hoiem, D., Efros, A.A., Hebert, M.: Recovering surface layout from an image. Int. J. Comput. Vis. 75(1), 151\u2013172 (2007)","journal-title":"Int. J. Comput. Vis."},{"key":"1092_CR11","unstructured":"Chen, R., Mahmood, F., Yuille, A., and Durr, N. J.: Rethinking monocular depth estimation with adversarial training. arXiv preprint. arXiv:1808.07528 (2018)"},{"key":"1092_CR12","doi-asserted-by":"crossref","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from rgbd images. In: European conference on computer vision, Springer, pp. 746\u2013760 (2012)","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"1092_CR13","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition, IEEE, pp. 3354\u20133361 (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1092_CR14","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"1092_CR15","unstructured":"Li, B., Shen, C., Dai, Y., Van Den Hengel, A., He, M.: Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1119\u20131127 (2015)"},{"issue":"11","key":"1092_CR16","doi-asserted-by":"publisher","first-page":"3174","DOI":"10.1109\/TCSVT.2017.2740321","volume":"28","author":"Y Cao","year":"2017","unstructured":"Cao, Y., Wu, Z., Shen, C.: Estimating depth from monocular images as classification using deep fully convolutional residual networks. IEEE Trans. Circuits Syst. Video Technol. 28(11), 3174\u20133182 (2017)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1092_CR17","unstructured":"Wang, P., Shen, X., Lin, Z., Cohen, S., Price, B., Yuille, A. L.: Towards unified depth and semantic prediction from a single image. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2800\u20132809 (2015)"},{"key":"1092_CR18","doi-asserted-by":"crossref","unstructured":"Xu, D., Ouyang, W., Wang, X., Sebe, N.: Pad-net: Multi-tasks guided prediction-and-distillation network for simultaneous depth estimation and scene parsing. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 675\u2013684 (2018)","DOI":"10.1109\/CVPR.2018.00077"},{"key":"1092_CR19","doi-asserted-by":"publisher","first-page":"4296","DOI":"10.1109\/TIP.2020.2968250","volume":"29","author":"C Chen","year":"2020","unstructured":"Chen, C., Wei, J., Peng, C., Zhang, W., Qin, H.: Improved saliency detection in rgb-d images using two-phase depth estimation and selective deep fusion. IEEE Trans. Image Process. 29, 4296\u20134307 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"1092_CR20","doi-asserted-by":"crossref","unstructured":"Zhan, H., Garg, R., Weerasekera, C. S., Li, K., Agarwal, H., Reid, I.: Unsupervised learning of monocular depth estimation and visual odometry with deep feature reconstruction. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 340\u2013349 (2018)","DOI":"10.1109\/CVPR.2018.00043"},{"key":"1092_CR21","doi-asserted-by":"crossref","unstructured":"Wang, C., Buenaposada, J. M., Zhu, R., Lucey, S.: Learning depth from monocular videos using direct methods. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2022\u20132030 (2018)","DOI":"10.1109\/CVPR.2018.00216"},{"key":"1092_CR22","doi-asserted-by":"crossref","unstructured":"Garg, R., Bg, V. K., Carneiro, G., Reid, I.: Unsupervised cnn for single view depth estimation: geometry to the rescue. In: European conference on computer vision, Springer, pp. 740\u2013756 (2016)","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1092_CR23","doi-asserted-by":"crossref","unstructured":"Zhou, T., Krahenbuhl, P., Aubry, M., Huang, Q., Efros, A. A.: Learning dense correspondence via 3d-guided cycle consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 117\u2013126 (2016)","DOI":"10.1109\/CVPR.2016.20"},{"key":"1092_CR24","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: Advances in neural information processing systems, pp. 2672\u20132680 (2014)"},{"key":"1092_CR25","doi-asserted-by":"crossref","unstructured":"Kundu, J. N., Uppala, P. K., Pahuja, A., Babu, R. V.: Adadepth: unsupervised content congruent adaptation for depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2656\u20132665 (2018)","DOI":"10.1109\/CVPR.2018.00281"},{"key":"1092_CR26","unstructured":"Mirza, M., Osindero, S.: Conditional generative adversarial nets. arXiv preprint. arXiv:1411.1784 (2014)"},{"key":"1092_CR27","doi-asserted-by":"crossref","unstructured":"Zhu, J.-Y., Park, T., Isola, P., Efros, A. A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE international conference on computer vision, pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"},{"key":"1092_CR28","doi-asserted-by":"crossref","unstructured":"Kumar, A. C. S., Bhandarkar, S. M., Prasad, M.: Monocular depth prediction using generative adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp. 300\u2013308 (2018)","DOI":"10.1109\/CVPRW.2018.00068"},{"key":"1092_CR29","doi-asserted-by":"crossref","unstructured":"Almalioglu, Y., Saputra, M. R. U., de Gusmao, P. P., Markham, A., Trigoni, N.: Ganvo: unsupervised deep monocular visual odometry and depth estimation with generative adversarial networks. In: 2019 International conference on robotics and automation (ICRA), IEEE, pp. 5474\u20135480 (2019)","DOI":"10.1109\/ICRA.2019.8793512"},{"key":"1092_CR30","doi-asserted-by":"crossref","unstructured":"Hao, Z., Li, Y., You, S., and Lu, F.: Detail preserving depth estimation from a single image using attention guided networks. In: 2018 International conference on 3D vision (3DV), IEEE, pp. 304\u2013313 (2018)","DOI":"10.1109\/3DV.2018.00043"},{"key":"1092_CR31","unstructured":"Zhang, H., Goodfellow, I., Metaxas, D., and Odena, A.: Self-attention generative adversarial networks. In: International conference on machine learning, pp. 7354\u20137363, PMLR (2019)"},{"key":"1092_CR32","unstructured":"Miyato, T., Kataoka, T., Koyama, M., Yoshida, Y.: Spectral normalization for generative adversarial networks. arXiv preprint. arXiv:1802.05957 (2018)"},{"key":"1092_CR33","unstructured":"Krizhevsky, A., Sutskever, I., and Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp. 1097\u20131105 (2012)"},{"key":"1092_CR34","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X, Ren, S., and Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1092_CR35","doi-asserted-by":"crossref","unstructured":"Xie, S., and Tu, Z.: Holistically-nested edge detection. In: Proceedings of the IEEE international conference on computer vision, pp. 1395\u20131403 (2015)","DOI":"10.1109\/ICCV.2015.164"},{"key":"1092_CR36","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado,G. S., Davis, A., Dean, J., Devin, M. et al.: Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint. arXiv:1603.04467 (2016)"},{"key":"1092_CR37","unstructured":"Kingma, D.P., and Ba, J.: Adam: a method for stochastic optimization. arXiv preprint. arXiv:1412.6980 (2014)"},{"issue":"10","key":"1092_CR38","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2016","unstructured":"Liu, F., Shen, C., Lin, G., Reid, I.: Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans. Pattern Anal. Mach. Intell. 38(10), 2024\u20132039 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1092_CR39","unstructured":"Xu, D., Ricci, E., Ouyang, W., Wang, X., Sebe, N.: Monocular depth estimation using multi-scale continuous crfs as sequential deep networks. IEEE Trans. Pattern Anal. Mach. Intell. 41(6), 1426\u20131440 (2019)"},{"key":"1092_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., and Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1851\u20131858 (2017)","DOI":"10.1109\/CVPR.2017.700"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-021-01092-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-021-01092-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-021-01092-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,22]],"date-time":"2022-12-22T03:53:45Z","timestamp":1671681225000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-021-01092-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,22]]},"references-count":40,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,8]]}},"alternative-id":["1092"],"URL":"https:\/\/doi.org\/10.1007\/s11554-021-01092-0","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"type":"print","value":"1861-8200"},{"type":"electronic","value":"1861-8219"}],"subject":[],"published":{"date-parts":[[2021,3,22]]},"assertion":[{"value":"31 January 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}