{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:32:22Z","timestamp":1775745142564,"version":"3.50.1"},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2021,1,3]],"date-time":"2021-01-03T00:00:00Z","timestamp":1609632000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,3]],"date-time":"2021-01-03T00:00:00Z","timestamp":1609632000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61375007"],"award-info":[{"award-number":["61375007"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Basic Research Programs of Science and Technology Commission Foundation of Shanghai","award":["15JC1400600"],"award-info":[{"award-number":["15JC1400600"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s13042-020-01251-y","type":"journal-article","created":{"date-parts":[[2021,1,3]],"date-time":"2021-01-03T13:02:47Z","timestamp":1609678967000},"page":"1583-1596","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":72,"title":["Attention-based context aggregation network for monocular depth estimation"],"prefix":"10.1007","volume":"12","author":[{"given":"Yuru","family":"Chen","sequence":"first","affiliation":[]},{"given":"Haitao","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Zhengwei","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Jingchao","family":"Peng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,3]]},"reference":[{"key":"1251_CR1","doi-asserted-by":"crossref","unstructured":"Silberman N, Hoiem D, Kohli P, Fergus R (2012) Indoor segmentation and support inference from rgbd images 7576(1):746\u2013760","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"1251_CR2","doi-asserted-by":"crossref","unstructured":"Simon M, Milz S, Amende K, Gross HM (2018) Complex-yolo: real-time 3d object detection on point clouds","DOI":"10.1109\/CVPRW.2019.00158"},{"key":"1251_CR3","doi-asserted-by":"crossref","unstructured":"Tateno K, Tombari F, Laina I, Navab N (2017) Cnn-slam: real-time dense monocular slam with learned depth prediction. p 6565\u20136574","DOI":"10.1109\/CVPR.2017.695"},{"key":"1251_CR4","doi-asserted-by":"crossref","unstructured":"Laina I, Rupprecht C, Belagiannis V, Tombari F, Navab N (2016) Deeper depth prediction with fully convolutional residual networks. 3D Vision (3DV), 2016 fourth international conference on. p 239\u2013248. IEEE","DOI":"10.1109\/3DV.2016.32"},{"issue":"11","key":"1251_CR5","doi-asserted-by":"publisher","first-page":"3145","DOI":"10.1007\/s13042-019-01005-5","volume":"10","author":"S Ghosh","year":"2019","unstructured":"Ghosh S, Pal A, Jaiswal S, Santosh KC, Das N, Nasipuri M (2019) Segfast-v2: Semantic image segmentation with less parameters in deep learning for autonomous driving. Int J Mach Learn Cybern 10(11):3145\u20133154","journal-title":"Int J Mach Learn Cybern"},{"key":"1251_CR6","doi-asserted-by":"crossref","unstructured":"Hirschm\u00fcller H (2005) Accurate and efficient stereo processing by semi-global matching and mutual information. IEEE computer society conference on computer vision and pattern recognition. p 807\u2013814","DOI":"10.1109\/CVPR.2005.56"},{"key":"1251_CR7","doi-asserted-by":"crossref","unstructured":"Roberts R, Sinha SN, Szeliski R, Steedly D (2011) Structure from motion for scenes with large duplicate structures. IEEE conference on computer vision and pattern recognition. p 3137\u20133144","DOI":"10.1109\/CVPR.2011.5995549"},{"key":"1251_CR8","first-page":"2366","volume":"1","author":"D Eigen","year":"2014","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. Int Conf Neural Inf Process Syst. 1:2366\u20132374","journal-title":"Int Conf Neural Inf Process Syst"},{"key":"1251_CR9","doi-asserted-by":"crossref","unstructured":"Eigen D, Fergus R (2014) Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. pp. 2650\u20132658","DOI":"10.1109\/ICCV.2015.304"},{"key":"1251_CR10","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: convolutional networks for biomedical image segmentation. International conference on medical image computing and computer-assisted intervention. p 234\u2013241","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"1251_CR11","doi-asserted-by":"crossref","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. p 483\u2013499","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"1251_CR12","doi-asserted-by":"crossref","unstructured":"Wei SE, Ramakrishna V, Kanade T, Sheikh Y (2016) Convolutional pose machines. p 4724\u20134732","DOI":"10.1109\/CVPR.2016.511"},{"key":"1251_CR13","unstructured":"Huang J, Lee AB, Mumford D (2000) Statistics of range images. Comput Vis Pattern Recogn. Proceedings IEEE conference on. vol.1. p 324\u2013331"},{"key":"1251_CR14","unstructured":"Yu F, Koltun V (2015) Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122"},{"issue":"4","key":"1251_CR15","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"LC Chen, G Papandreou, I Kokkinos, K Murphy, AL Yuille (2018) Deeplab Semantic image segmentation with deep convolutional nets atrous convolution and fully connected. IEEE Trans Pattern Anal Mach Intell 40(4): 834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1251_CR16","unstructured":"Chen LC, Papandreou G, Schroff F, Adam H (2017) Rethinking atrous convolution for semantic image segmentation"},{"key":"1251_CR17","doi-asserted-by":"crossref","unstructured":"Wang P, Chen P, Yuan Y, Liu D, Huang Z, Hou X, Cottrell G (2018) Understanding convolution for semantic segmentation. IEEE winter conference on applications of computer vision. p 1451\u20131460","DOI":"10.1109\/WACV.2018.00163"},{"key":"1251_CR18","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need"},{"key":"1251_CR19","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick R, Gupta A, He K (2017) Non-local neural networks","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1251_CR20","unstructured":"Yuan Y, Wang J (2018) Ocnet: Object context network for scene parsing. arXiv preprint arXiv:1809.00916"},{"key":"1251_CR21","unstructured":"Saxena A, Chung SH, Ng AY (2005) Learning depth from single monocular images. International conference on neural information processing systems. p 1161\u20131168"},{"key":"1251_CR22","doi-asserted-by":"crossref","unstructured":"Saxena A, Sun M, Ng AY (2007) Learning 3-d scene structure from a single still image. IEEE international conference on computer vision. p 1\u20138","DOI":"10.1109\/ICCV.2007.4408828"},{"key":"1251_CR23","doi-asserted-by":"crossref","unstructured":"Liu B, Gould S, Koller D (2010) Single image depth estimation from predicted semantic labels. Comput Vis Pattern Recogn. p 1253\u20131260","DOI":"10.1109\/CVPR.2010.5539823"},{"key":"1251_CR24","doi-asserted-by":"crossref","unstructured":"Ladicky L, Shi J, Pollefeys M (2014) Pulling things out of perspective. IEEE Conf Comput Vis Pattern Recogn 9:89\u201396","DOI":"10.1109\/CVPR.2014.19"},{"key":"1251_CR25","unstructured":"Junjie H, Ozay M, Zhang Y, Okatani T (2018) Toward higher resolution maps with accurate object boundaries, revisiting single image depth estimation"},{"key":"1251_CR26","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1016\/j.neucom.2017.08.074","volume":"280","author":"H Yan","year":"2018","unstructured":"Han Yan, Shunli Zhang, Yu Zhang, and Li Zhang. Monocular depth estimation with guidance of surface normal map. Neurocomputing, 280:86\u2013100, 2018","journal-title":"Neurocomputing"},{"issue":"3","key":"1251_CR27","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/s13042-019-01020-6","volume":"11","author":"Junning Zhang","year":"2020","unstructured":"Junning Zhang, Qunxing Su, Pengyuan Liu, Chao Xu, and Yanlong Chen. Unsupervised learning of monocular depth and ego-motion with space\u0161ctemporal-centroid loss. International Journal of Machine Learning and Cybernetics, 11(3), 615\u2013627, 2020","journal-title":"Int J Mach Learn Cybern"},{"key":"1251_CR28","doi-asserted-by":"crossref","unstructured":"Roy A, Todorovic S (2016) Monocular depth estimation using neural regression forest. Comput Vis Pattern Recogn. p 5506\u20135514","DOI":"10.1109\/CVPR.2016.594"},{"key":"1251_CR29","unstructured":"Zwald L, Lambertlacroix S (2012) The berhu penalty and the grouped effect. Statistics"},{"key":"1251_CR30","doi-asserted-by":"crossref","unstructured":"Garg R, Vijay Kumar BG, Carneiro G, Reid I (2016) Unsupervised cnn for single view depth estimation: Geometry to the rescue. European conference on computer vision. p 740\u00c3\u00a2\u00e2\u201a\u00ac\u00e2\u20ac\u0153756","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1251_CR31","first-page":"6602","volume":"1","author":"C Godard","year":"2017","unstructured":"Godard C, Aodha OM, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. Comput Vis Pattern Recogn. 1:6602\u20136611","journal-title":"Comput Vis Pattern Recogn"},{"issue":"4","key":"1251_CR32","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"1251_CR33","doi-asserted-by":"crossref","unstructured":"Heise P, Klose S, Jensen B, Knoll A (2014) Pm-huber: Patchmatch with huber regularization for stereo matching. IEEE international conference on computer vision. p 2360\u20132367","DOI":"10.1109\/ICCV.2013.293"},{"issue":"1\u20133","key":"1251_CR34","first-page":"3","volume":"125","author":"Saining Xie","year":"2015","unstructured":"Saining Xie and Zhuowen Tu. Holistically-nested edge detection. International Journal of Computer Vision, 125(1\u20133), 3\u201318, 2015","journal-title":"Int J Comput Vis"},{"key":"1251_CR35","doi-asserted-by":"crossref","unstructured":"Yu F, Koltun V, Funkhouser T (2017) Dilated residual networks. p 636\u2013644","DOI":"10.1109\/CVPR.2017.75"},{"key":"1251_CR36","doi-asserted-by":"crossref","unstructured":"Kim Y, Jung H, Min D, Sohn K (2018) Deep monocular depth estimation via integration of global and local predictions. IEEE Trans Image Process Publ IEEE Sig Process Soc 99:1\u20131","DOI":"10.1109\/TIP.2018.2836318"},{"key":"1251_CR37","doi-asserted-by":"crossref","unstructured":"Xu D, Ricci E, Ouyang W, Wang X, Sebe N (2017) Multi-scale continuous crfs as sequential deep networks for monocular depth estimation. p 161\u2013169","DOI":"10.1109\/CVPR.2017.25"},{"key":"1251_CR38","doi-asserted-by":"crossref","unstructured":"Liu F, Shen C, Lin G (2015) Deep convolutional neural fields for depth estimation from a single image. IEEE conference on computer vision and pattern recognition. p 5162\u20135170","DOI":"10.1109\/CVPR.2015.7299152"},{"key":"1251_CR39","unstructured":"Li B, Shen C, Dai Y, Van Den Hengel A, He M (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. Comput Vis Pattern Recogn. p 1119\u20131127"},{"issue":"10","key":"1251_CR40","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"F. Liu, C. Shen, G. Lin, and I Reid. Learning depth from single monocular images using deep convolutional neural fields. IEEE Transactions on Pattern Analysis & Machine Intelligence, 38(10), 2024\u20132039, 2015","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1251_CR41","first-page":"1","volume":"99","author":"Z Zhang","year":"2018","unstructured":"Zhang Z, Xu C, Yang J, Gao J, Cui Z (2018) Progressive hard-mining network for monocular depth estimation. IEEE Trans Image Process. 99:1\u20131","journal-title":"IEEE Trans Image Process"},{"key":"1251_CR42","doi-asserted-by":"crossref","unstructured":"Li B, Dai Y, He M (2018) Monocular depth estimation with hierarchical fusion of dilated cnns and soft-weighted-sum inference. Pattern Recogn","DOI":"10.1016\/j.patcog.2018.05.029"},{"key":"1251_CR43","doi-asserted-by":"crossref","unstructured":"Moukari M, Picard S, Simon L, Jurie F (2018) Deep multi-scale architectures for monocular depth estimation. arXiv preprint arXiv:1806.03051","DOI":"10.1109\/ICIP.2018.8451408"},{"key":"1251_CR44","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. Proceedings of the IEEE conference on computer vision and pattern recognition. p. 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"key":"1251_CR45","doi-asserted-by":"crossref","unstructured":"Zheng S, Jayasumana S, Romera-Paredes B, Vineet V, Su Z, Du D, Huang C, Torr PHS (2015) Conditional random fields as recurrent neural networks. p 1529\u20131537","DOI":"10.1109\/ICCV.2015.179"},{"key":"1251_CR46","doi-asserted-by":"crossref","unstructured":"Lin G, Shen C, Reid I, Van Dan Hengel A (2015) Efficient piecewise training of deep structured models for semantic segmentation. p 3194\u20133203","DOI":"10.1109\/CVPR.2016.348"},{"key":"1251_CR47","first-page":"1","volume":"99","author":"Y Cao","year":"2017","unstructured":"Cao Y, Wu Zi, Shen C (2017) Estimating depth from monocular images as classification using deep fully convolutional residual networks. IEEE Trans Circ Syst Video Technol. 99:1\u20131","journal-title":"IEEE Trans Circ Syst Video Technol."},{"key":"1251_CR48","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. Proceedings of the IEEE conference on computer vision and pattern recognition. p. 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"4","key":"1251_CR49","doi-asserted-by":"publisher","first-page":"873","DOI":"10.1007\/s13042-020-01063-0","volume":"11","author":"T Zia","year":"2020","unstructured":"Zia T, Abbas A, Habib U, Khan MS (2020) Learning deep hierarchical and temporal recurrent neural networks with residual learning. Int J Mach Learn Cybern 11(4):873\u2013882","journal-title":"Int J Mach Learn Cybern"},{"key":"1251_CR50","doi-asserted-by":"crossref","unstructured":"Zhou B, Khosla A, Lapedriza A, Oliva A, Torralba A (2015) Learning deep features for discriminative localization. p 2921\u20132929","DOI":"10.1109\/CVPR.2016.319"},{"key":"1251_CR51","unstructured":"Liu W, Rabinovich A, Berg AC (2015) Parsenet: Looking wider to see better. arXiv preprint arXiv:1506.04579"},{"key":"1251_CR52","doi-asserted-by":"crossref","unstructured":"Li R, Xian K, Shen C, Cao Z, Lu H, Hang L (2018) Deep attention-based classification network for robust depth prediction","DOI":"10.1007\/978-3-030-20870-7_41"},{"key":"1251_CR53","doi-asserted-by":"crossref","unstructured":"Niu Z, Zhou M, Wang L, Gao X, Hua G (2016) Ordinal regression with multiple output cnn for age estimation. The IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2016.532"},{"key":"1251_CR54","doi-asserted-by":"crossref","unstructured":"Geiger A (2012) Are we ready for autonomous driving? the kitti vision benchmark suite. IEEE conference on computer vision and pattern recognition. p 3354\u20133361","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"3","key":"1251_CR55","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"Olga Russakovsky","year":"2015","unstructured":"Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, and Michael Bernstein. Imagenet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252, 2015","journal-title":"Int J Comput Vis"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-020-01251-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-020-01251-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-020-01251-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T16:22:15Z","timestamp":1724170935000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-020-01251-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,3]]},"references-count":55,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["1251"],"URL":"https:\/\/doi.org\/10.1007\/s13042-020-01251-y","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,3]]},"assertion":[{"value":"1 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 November 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}