{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T14:05:26Z","timestamp":1778335526122,"version":"3.51.4"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2014,11,21]],"date-time":"2014-11-21T00:00:00Z","timestamp":1416528000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2015,4]]},"DOI":"10.1007\/s11263-014-0777-6","type":"journal-article","created":{"date-parts":[[2014,11,21]],"date-time":"2014-11-21T17:02:15Z","timestamp":1416589335000},"page":"133-149","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":187,"title":["Indoor Scene Understanding with RGB-D Images: Bottom-up Segmentation, Object Detection and Semantic Segmentation"],"prefix":"10.1007","volume":"112","author":[{"given":"Saurabh","family":"Gupta","sequence":"first","affiliation":[]},{"given":"Pablo","family":"Arbel\u00e1ez","sequence":"additional","affiliation":[]},{"given":"Ross","family":"Girshick","sequence":"additional","affiliation":[]},{"given":"Jitendra","family":"Malik","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,11,21]]},"reference":[{"key":"777_CR1","doi-asserted-by":"crossref","unstructured":"Arbelaez, P., Hariharan, B., Gu, C., Gupta, S., Bourdev, L., & Malik, J. (2012). Semantic segmentation using regions and parts. In CVPR.","DOI":"10.1109\/CVPR.2012.6248077"},{"key":"777_CR2","doi-asserted-by":"crossref","unstructured":"Arbelaez, P., Maire, M., Fowlkes, C., & Malik, J. (2011). Contour detection and hierarchical image segmentation. In TPAMI.","DOI":"10.1109\/TPAMI.2010.161"},{"key":"777_CR3","doi-asserted-by":"crossref","unstructured":"Barron, J. T., Malik, J. (2013). Intrinsic scene properties from a single RGB-D image. In CVPR.","DOI":"10.1109\/CVPR.2013.10"},{"key":"777_CR4","doi-asserted-by":"crossref","unstructured":"Bourdev, L., Maji, S., Brox, T., & Malik, J. (2010). Detecting people using mutually consistent poselet activations. In ECCV.","DOI":"10.1007\/978-3-642-15567-3_13"},{"key":"777_CR5","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine Learning, 45, 5\u201332.","journal-title":"Machine Learning"},{"key":"777_CR6","doi-asserted-by":"crossref","unstructured":"Carreira, J., Caseiro, R., Batista, J., & Sminchisescu, C. (2012). Semantic segmentation with second-order pooling. In ECCV.","DOI":"10.1007\/978-3-642-33786-4_32"},{"key":"777_CR7","doi-asserted-by":"crossref","unstructured":"Carreira, J., Li, F., & Sminchisescu, C. (2012). Object recognition by sequential figure-ground ranking. In IJCV.","DOI":"10.1007\/s11263-011-0507-2"},{"key":"777_CR8","volume-title":"ECCV","author":"J Carreira","year":"2012","unstructured":"Carreira, J., Caseiro, R., Batista, J., & Sminchisescu, C. (2012). Semantic segmentation with second-order pooling. ECCV. Berlin Heidelberg: Springer."},{"key":"777_CR9","unstructured":"Criminisi, A., Shotton, J., & Konukoglu, E. (2012). Decision forests: A unified framework for classification, regression, density estimation, manifold learning and semi-supervised learning. Graphics and Vision: Found and Trends in Comp."},{"key":"777_CR10","doi-asserted-by":"crossref","unstructured":"Dalal, N., & Triggs, B.(2005). Histograms of oriented gradients for human detection. In CVPR.","DOI":"10.1109\/CVPR.2005.177"},{"key":"777_CR11","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. -J., Li, K., & Fei-Fei, L. (2009). Image{N}et: A large-scale hierarchical image database. In CVPR."},{"key":"777_CR12","doi-asserted-by":"crossref","unstructured":"Doll\u00e1r, P., Zitnick, C. L. (2013). Structured forests for fast edge detection. In ICCV.","DOI":"10.1109\/ICCV.2013.231"},{"key":"777_CR13","doi-asserted-by":"crossref","unstructured":"Endres, I., Shih, K. J., Jiaa, J., & Hoiem, D. (2013). Learning collections of part models for object recognition. In CVPR.","DOI":"10.1109\/CVPR.2013.126"},{"key":"777_CR14","doi-asserted-by":"crossref","unstructured":"Everingham, M., Van Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2010). The PASCAL Visual Object Classes (VOC) Challenge. In IJCV.","DOI":"10.1007\/s11263-009-0275-4"},{"key":"777_CR15","unstructured":"Everingham, M., Van Gool, L., Williams, C.K.I., Winn, J., & Zisserman, A.(2012). The PASCAL Visual Object Classes Challenge (VOC2012) Results. http:\/\/www.pascal-network.org\/challenges\/VOC\/voc2012\/workshop\/index.html"},{"key":"777_CR16","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., Girshick, R., McAllester, D., & Ramanan, D. (2010). Object detection with discriminatively trained part based models. In TPAMI.","DOI":"10.1109\/TPAMI.2009.167"},{"key":"777_CR17","doi-asserted-by":"crossref","unstructured":"Frome, A., Huber, D., Kolluri, R., B\u00fclow, T., & Malik, J. (2004). Recognizing objects in range data using regional point descriptors. In ECCV.","DOI":"10.1007\/978-3-540-24672-5_18"},{"key":"777_CR18","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR.","DOI":"10.1109\/CVPR.2014.81"},{"key":"777_CR19","doi-asserted-by":"crossref","unstructured":"Gupta, S., Arbelaez, P., & Malik, J. (2013). Perceptual organization and recognition of indoor scenes from RGB-D images. In CVPR.","DOI":"10.1109\/CVPR.2013.79"},{"key":"777_CR20","doi-asserted-by":"crossref","unstructured":"Gupta, A., Efros, A., & Hebert, M. (2010). Blocks world revisited: Image understanding using qualitative geometry and mechanics. In ECCV.","DOI":"10.1007\/978-3-642-15561-1_35"},{"key":"777_CR21","doi-asserted-by":"crossref","unstructured":"Gupta, S., Girshick, R., Arbel\u00e1ez, P., & Malik, J. (2014). Learning rich features from RGB-D images for Object detection and segmentation. In ECCV.","DOI":"10.1007\/978-3-319-10584-0_23"},{"key":"777_CR22","doi-asserted-by":"crossref","unstructured":"Gupta, A., Satkin, S., Efros, A., & Hebert, M. (2011). From 3D scene geometry to human workspace. In CVPR.","DOI":"10.1109\/CVPR.2011.5995448"},{"key":"777_CR23","doi-asserted-by":"crossref","unstructured":"Hedau, V., Hoiem, D., & Forsyth, D. (2012). Recovering free space of indoor scenes from a single image. In CVPR.","DOI":"10.1109\/CVPR.2012.6248005"},{"key":"777_CR24","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Efros, A., & Hebert, M. (2007). Recovering surface layout from an image. In IJCV.","DOI":"10.1007\/s11263-006-0031-y"},{"key":"777_CR25","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Efros, A., & Hebert, M. (2011). Recovering occlusion boundaries from an image. In IJCV.","DOI":"10.1007\/s11263-010-0400-4"},{"key":"777_CR26","doi-asserted-by":"crossref","unstructured":"Izadi, S., Kim, D., Hilliges, O., Molyneaux, D., Newcombe, R., Kohli, P., Shotton, J., Hodges, S., Freeman, D., Davison, A., & Fitzgibbon, A. (2011). KinectFusion: Real-time 3D reconstruction and interaction using a moving depth camera. In UIST.","DOI":"10.1145\/2047196.2047270"},{"key":"777_CR27","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/978-1-4471-4640-7_8","volume-title":"Consumer Depth Cameras for Computer Vision","author":"A Janoch","year":"2013","unstructured":"Janoch, A., Karayev, S., Jia, Y., Barron, J. T., Fritz, M., Saenko, K., et al. (2013). A category-level 3D object dataset: Putting the kinect to work. Consumer Depth Cameras for Computer Vision (pp. 141\u2013165). Berlin: Springer."},{"key":"777_CR28","doi-asserted-by":"crossref","unstructured":"Johnson, A., Hebert, M. (1999). Using spin images for efficient object recognition in cluttered 3D scenes. In TPAMI.","DOI":"10.1109\/34.765655"},{"key":"777_CR29","volume-title":"Organization in Vision: Essays on Gestalt Perception","author":"G Kanizsa","year":"1979","unstructured":"Kanizsa, G. (1979). Organization in Vision: Essays on Gestalt Perception. New York: Praeger Publishers."},{"key":"777_CR30","unstructured":"Koppula, H., Anand, A., Joachims, T., & Saxena, A. (2011). Semantic labeling of 3d point clouds for indoor scenes. In NIPS."},{"key":"777_CR31","doi-asserted-by":"crossref","unstructured":"Ladicky, L., Russell, C., Kohli, P., & Torr, P. H. S. (2010). Graph cut based inference with co-occurrence statistics. In ECCV.","DOI":"10.1007\/978-3-642-15555-0_18"},{"key":"777_CR32","doi-asserted-by":"crossref","unstructured":"Lai, K., Bo, L., Ren, X., & Fox, D. (2011). A large-scale hierarchical multi-view RGB-D object dataset. In ICRA.","DOI":"10.1109\/ICRA.2011.5980382"},{"key":"777_CR33","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1007\/978-1-4471-4640-7_9","volume-title":"Consumer Depth Cameras for Computer Vision: Research Topics and Applications","author":"K Lai","year":"2013","unstructured":"Lai, K., Bo, L., Ren, X., & Fox, D. (2013). RGB-D object recognition: Features, algorithms, and a large scale benchmark. In A. Fossati, J. Gall, H. Grabner, X. Ren, & K. Konolige (Eds.), Consumer Depth Cameras for Computer Vision: Research Topics and Applications (pp. 167\u2013192). Berlin: Springer."},{"key":"777_CR34","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., & Ponce, J. (2006). Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In CVPR.","DOI":"10.1109\/CVPR.2006.68"},{"key":"777_CR35","unstructured":"Lee, D., Gupta, A., Hebert, M., & Kanade, T. (2010). Estimating spatial layout of rooms using volumetric reasoning about objects and surfaces. In NIPS."},{"key":"777_CR36","doi-asserted-by":"crossref","unstructured":"Lee, D., Hebert, M., & Kanade, T. (2009). Geometric reasoning for single image structure recovery. In CVPR.","DOI":"10.1109\/CVPR.2009.5206872"},{"key":"777_CR37","doi-asserted-by":"crossref","unstructured":"Maji, S., Berg, A. C., & Malik, J. (2013). Efficient classification for additive kernel svms. In TPAMI.","DOI":"10.1109\/TPAMI.2012.62"},{"key":"777_CR38","doi-asserted-by":"crossref","unstructured":"Martin, D., Fowlkes, C., & Malik, J. (2004). Learning to detect natural image boundaries using local brightness, color and texture cues. In TPAMI.","DOI":"10.1109\/TPAMI.2004.1273918"},{"key":"777_CR39","unstructured":"Reconstruction meets recognition challenge, iccv 2013. http:\/\/ttic.uchicago.edu\/~rurtasun\/rmrc\/index.php (2013)"},{"key":"777_CR40","unstructured":"Ren, X., & Bo, L. (2012). Discriminatively trained sparse code gradients for contour detection. In NIPS."},{"key":"777_CR41","unstructured":"Ren, X., Bo, L., & Fox, D.(2012). RGB-(D) scene labeling: Features and algorithms. In CVPR."},{"key":"777_CR42","doi-asserted-by":"crossref","unstructured":"Rusu, R. B., Blodow, N., & Beetz, M. (2009). Fast point feature histograms (FPFH) for 3D registration. In ICRA.","DOI":"10.1109\/ROBOT.2009.5152473"},{"issue":"8","key":"777_CR43","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1021\/ac60214a047","volume":"36","author":"A Savitsky","year":"1964","unstructured":"Savitsky, A., & Golay, M. (1964). Smoothing and differentiation of data by simplified least squares procedures. Analytical Chemistry, 36(8), 1627\u20131639.","journal-title":"Analytical Chemistry"},{"key":"777_CR44","doi-asserted-by":"crossref","unstructured":"Saxena, A., Chung, S., & Ng, A. (2008). 3-D depth reconstruction from a single still image. In IJCV.","DOI":"10.1007\/s11263-007-0071-y"},{"key":"777_CR45","doi-asserted-by":"crossref","unstructured":"Shotton, J., Fitzgibbon, A. W., Cook, M., Sharp, T., Finocchio, M., Moore, R., Kipman, A., & Blake, A. (2011). Real-time human pose recognition in parts from single depth images. In CVPR.","DOI":"10.1109\/CVPR.2011.5995316"},{"key":"777_CR46","doi-asserted-by":"crossref","unstructured":"Silberman, N., Hoiem, D., Kohli, P., & Fergus, R. (2012). Indoor segmentation and support inference from RGBD images. In ECCV.","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"777_CR47","doi-asserted-by":"crossref","unstructured":"soo Kim, B., Xu, S., & Savarese, S.(2013) Accurate localization of 3D objects from RGB-D data using segmentation hypotheses. In CVPR.","DOI":"10.1109\/CVPR.2013.409"},{"key":"777_CR48","unstructured":"Tang, S., Wang, X., Lv, X., Han, T.X., Keller, J., He, Z., Skubic, M., & Lao, S. (2012). Histogram of oriented normal vectors for object recognition with a depth sensor. In ACCV."},{"key":"777_CR49","doi-asserted-by":"crossref","unstructured":"van de Sande, K. E. A., Gevers, T., & Snoek, C. G. M. (2010). Evaluating color descriptors for object and scene recognition. In TPAMI.","DOI":"10.1109\/TPAMI.2009.154"},{"key":"777_CR50","doi-asserted-by":"crossref","unstructured":"Viola, P., & Jones, M.(2001). Rapid object detection using a boosted cascade of simple features. In CVPR.","DOI":"10.1109\/CVPR.2001.990517"},{"key":"777_CR51","unstructured":"Ye, E.S.(2013). Object detection in RGB-D indoor scenes. Master\u2019s thesis, EECS Department, University of California, Berkeley."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-014-0777-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-014-0777-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-014-0777-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,23]],"date-time":"2022-04-23T02:42:32Z","timestamp":1650681752000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-014-0777-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,11,21]]},"references-count":51,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2015,4]]}},"alternative-id":["777"],"URL":"https:\/\/doi.org\/10.1007\/s11263-014-0777-6","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,11,21]]}}}