{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:46:42Z","timestamp":1778258802378,"version":"3.51.4"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319105833","type":"print"},{"value":"9783319105840","type":"electronic"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-10584-0_23","type":"book-chapter","created":{"date-parts":[[2014,8,14]],"date-time":"2014-08-14T11:40:08Z","timestamp":1408016408000},"page":"345-360","source":"Crossref","is-referenced-by-count":748,"title":["Learning Rich Features from RGB-D Images for Object Detection and Segmentation"],"prefix":"10.1007","author":[{"given":"Saurabh","family":"Gupta","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ross","family":"Girshick","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pablo","family":"Arbel\u00e1ez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jitendra","family":"Malik","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Arbel\u00e1ez, P., Pont-Tuset, J., Barron, J., Marques, F., Malik, J.: Multiscale combinatorial grouping. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.49"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Arbel\u00e1ez, P., Maire, M., Fowlkes, C., Malik, J.: Contour detection and hierarchical image segmentation. TPAMI (2011)","DOI":"10.1109\/TPAMI.2010.161"},{"key":"23_CR3","unstructured":"Banica, D., Sminchisescu, C.: CPMC-3D-O2P: Semantic segmentation of RGB-D images using CPMC and second order pooling. CoRR abs\/1312.7715 (2013)"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Bo, L., Ren, X., Fox, D.: Unsupervised Feature Learning for RGB-D Based Object Recognition. In: ISER (2012)","DOI":"10.1007\/978-3-319-00065-7_27"},{"key":"23_CR5","unstructured":"Breiman, L.: Random forests. Machine Learning (2001)"},{"key":"23_CR6","unstructured":"Couprie, C., Farabet, C., Najman, L., LeCun, Y.: Indoor semantic segmentation using depth information. CoRR abs\/1301.3572 (2013)"},{"key":"23_CR7","unstructured":"Deng, J., Berg, A., Satheesh, S., Su, H., Khosla, A., Fei-Fei, L.: ImageNet Large Scale Visual Recognition Competition 2012 (ILSVRC 2012) (2012), \n                    \n                      http:\/\/www.image-net.org\/challenges\/LSVRC\/2012\/"},{"key":"23_CR8","unstructured":"Doll\u00e1r, P.: Piotr\u2019s Image and Video Matlab Toolbox (PMT), \n                    \n                      http:\/\/vision.ucsd.edu\/~pdollar\/toolbox\/doc\/index.html"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Doll\u00e1r, P., Zitnick, C.L.: Structured forests for fast edge detection. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.231"},{"key":"23_CR10","unstructured":"Doll\u00e1r, P., Zitnick, C.L.: Fast edge detection using structured forests. CoRR abs\/1406.5549 (2014)"},{"key":"23_CR11","unstructured":"Donahue, J., Jia, Y., Vinyals, O., Hoffman, J., Zhang, N., Tzeng, E., Darrell, T.: Decaf: A deep convolutional activation feature for generic visual recognition. In: ICML (2014)"},{"key":"23_CR12","unstructured":"Fan, R.E., Chang, K.W., Hsieh, C.J., Wang, X.R., Lin, C.J.: LIBLINEAR: A library for large linear classification. JMRL (2008)"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Farabet, C., Couprie, C., Najman, L., LeCun, Y.: Learning hierarchical features for scene labeling. TPAMI (2013)","DOI":"10.1109\/TPAMI.2012.231"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., Girshick, R., McAllester, D., Ramanan, D.: Object detection with discriminatively trained part based models. TPAMI (2010)","DOI":"10.1109\/TPAMI.2009.167"},{"key":"23_CR15","unstructured":"Geman, D., Amit, Y., Wilder, K.: Joint induction of shape features and tree classifiers. TPAMI (1997)"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Guo, R., Hoiem, D.: Support surface prediction in indoor scenes. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.266"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Gupta, S., Arbel\u00e1ez, P., Malik, J.: Perceptual organization and recognition of indoor scenes from RGB-D images. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.79"},{"key":"23_CR19","series-title":"LNCS","volume-title":"ECCV 2014, Part VII","author":"B. Hariharan","year":"2014","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Girshick, R., Malik, J.: Simultaneous detection and segmentation. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part VII. LNCS, vol.\u00a08695, Springer, Heidelberg (2014)"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Janoch, A., Karayev, S., Jia, Y., Barron, J.T., Fritz, M., Saenko, K., Darrell, T.: A category-level 3D object dataset: Putting the kinect to work. In: Consumer Depth Cameras for Computer Vision (2013)","DOI":"10.1007\/978-1-4471-4640-7_8"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Jia, Y.: Caffe: An open source convolutional architecture for fast feature embedding (2013), \n                    \n                      http:\/\/caffe.berkeleyvision.org\/","DOI":"10.1145\/2647868.2654889"},{"key":"23_CR22","doi-asserted-by":"crossref","unstructured":"Soo Kim, B., Xu, S., Savarese, S.: Accurate localization of 3D objects from RGB-D data using segmentation hypotheses. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.409"},{"key":"23_CR23","unstructured":"Koppula, H., Anand, A., Joachims, T., Saxena, A.: Semantic labeling of 3D point clouds for indoor scenes. In: NIPS (2011)"},{"key":"23_CR24","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: NIPS (2012)"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Lai, K., Bo, L., Ren, X., Fox, D.: A large-scale hierarchical multi-view rgb-d object dataset. In: ICRA (2011)","DOI":"10.1109\/ICRA.2011.5980382"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Boser, B., Denker, J.S., Henderson, D., Howard, R.E., Hubbard, W., Jackel, L.D.: Backpropagation applied to handwritten zip code recognition. Neural Computation (1989)","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Lim, J.J., Zitnick, C.L., Doll\u00e1r, P.: Sketch tokens: A learned mid-level representation for contour and object detection. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.406"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Lin, D., Fidler, S., Urtasun, R.: Holistic scene understanding for 3D object detection with RGBD cameras. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.179"},{"key":"23_CR29","unstructured":"Ren, X., Bo, L.: Discriminatively trained sparse code gradients for contour detection. In: NIPS (2012)"},{"key":"23_CR30","unstructured":"Ren, X., Bo, L., Fox, D.: RGB-(D) scene labeling: Features and algorithms. In: CVPR (2012)"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Shotton, J., Fitzgibbon, A.W., Cook, M., Sharp, T., Finocchio, M., Moore, R., Kipman, A., Blake, A.: Real-time human pose recognition in parts from single depth images. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995316"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A.: Building part-based object detectors via 3D geometry. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.219"},{"key":"23_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1007\/978-3-642-33715-4_54","volume-title":"Computer Vision \u2013 ECCV 2012","author":"N. Silberman","year":"2012","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from RGBD images. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part V. LNCS, vol.\u00a07576, pp. 746\u2013760. Springer, Heidelberg (2012)"},{"key":"23_CR34","unstructured":"Socher, R., Huval, B., Bath, B.P., Manning, C.D., Ng, A.Y.: Convolutional-recursive deep learning for 3D object classification. In: NIPS (2012)"},{"key":"23_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1007\/978-3-642-37444-9_41","volume-title":"Computer Vision \u2013 ACCV 2012","author":"S. Tang","year":"2013","unstructured":"Tang, S., Wang, X., Lv, X., Han, T.X., Keller, J., He, Z., Skubic, M., Lao, S.: Histogram of oriented normal vectors for object recognition with a depth sensor. In: Lee, K.M., Matsushita, Y., Rehg, J.M., Hu, Z. (eds.) ACCV 2012, Part II. LNCS, vol.\u00a07725, pp. 525\u2013538. Springer, Heidelberg (2013)"},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Tighe, J., Niethammer, M., Lazebnik, S.: Scene parsing with object instances and occlusion ordering. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.479"},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Wang, T., He, X., Barnes, N.: Learning structured hough voting for joint object detection and occlusion reasoning. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.234"},{"key":"23_CR38","unstructured":"Ye, E.S.: Object Detection in RGB-D Indoor Scenes. Master\u2019s thesis, EECS Department, University of California, Berkeley (January 2013), \n                    \n                      http:\/\/www.eecs.berkeley.edu\/Pubs\/TechRpts\/2013\/EECS-2013-3.html"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2014"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-10584-0_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,2]],"date-time":"2019-12-02T09:48:17Z","timestamp":1575280097000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-10584-0_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319105833","9783319105840"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-10584-0_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]}}}