{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T17:52:30Z","timestamp":1760205150150},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2021,8,2]],"date-time":"2021-08-02T00:00:00Z","timestamp":1627862400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,8,2]],"date-time":"2021-08-02T00:00:00Z","timestamp":1627862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1007\/s11265-021-01677-9","type":"journal-article","created":{"date-parts":[[2021,8,2]],"date-time":"2021-08-02T16:04:48Z","timestamp":1627920288000},"page":"1219-1233","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Object Detection in RGB-D Images via Anchor Box with Multi-Reduced Region Proposal Network and Multi-Pooling"],"prefix":"10.1007","volume":"93","author":[{"given":"Jiou-Ai","family":"Lin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ching-Te","family":"Chiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yen-Yu","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,8,2]]},"reference":[{"key":"1677_CR1","doi-asserted-by":"crossref","unstructured":"Arbel\u00e1ez, P., Pont-Tuset, J., Barron, J. T., Marques, F., & Malik, J. (2014). Multiscale combinatorial grouping. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 328\u2013335).","DOI":"10.1109\/CVPR.2014.49"},{"key":"1677_CR2","doi-asserted-by":"crossref","unstructured":"Armesto, L., & Tornero, J. (2009). Automation of industrial vehicles A vision-based line tracking application. In 2009 IEEE Conference on emerging technologies & factory automation (pp. 1\u20137). IEEE.","DOI":"10.1109\/ETFA.2009.5347051"},{"issue":"3","key":"1677_CR3","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes, C., & Vapnik, V. (1995). Support-vector networks. Machine Learning, 20(3), 273\u2013297.","journal-title":"Machine Learning"},{"key":"1677_CR4","doi-asserted-by":"crossref","unstructured":"Dalal, N., & Triggs, B. (2005). Histograms of oriented gradients for human detection. In IEEE computer society conference on computer vision and pattern recognition, 2005. CVPR 2005, (Vol. 1 pp. 886\u2013893). IEEE.","DOI":"10.1109\/CVPR.2005.177"},{"key":"1677_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. -J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In IEEE conference on computer vision and pattern recognition, 2009. CVPR 2009 (pp. 248\u2013255). IEEE.","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"2","key":"1677_CR6","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (voc) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"issue":"1","key":"1677_CR7","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1006\/jcss.1997.1504","volume":"55","author":"Y Freund","year":"1997","unstructured":"Freund, Y., & Schapire, R. E. (1997). A decision-theoretic generalization of on-line learning and an application to boosting. Journal of Computer and System Sciences, 55(1), 119\u2013139.","journal-title":"Journal of Computer and System Sciences"},{"key":"1677_CR8","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast r-cnn. In Proceedings of the IEEE international conference on computer vision (pp. 1440\u20131448).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1677_CR9","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 580\u2013587).","DOI":"10.1109\/CVPR.2014.81"},{"key":"1677_CR10","unstructured":"Hassibi, B., & Stork, D.G. (1993). Second order derivatives for network pruning: Optimal brain surgeon. In Advances in neural information processing systems (pp. 164\u2013171)."},{"key":"1677_CR11","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., & Weinberger, K. Q. (2017). Densely connected convolutional networks. In 2017 IEEE conference on computer vision and pattern recognition (CVPR), (Vol. 1 pp. 2261\u20132269).","DOI":"10.1109\/CVPR.2017.243"},{"key":"1677_CR12","doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., & Darrell, T. (2014). Caffe: Convolutional architecture for fast feature embedding. arXiv:1408.5093.","DOI":"10.1145\/2647868.2654889"},{"key":"1677_CR13","doi-asserted-by":"crossref","unstructured":"Lahoud, J., & Ghanem, B (2017). 2d-driven 3d object detection in rgb-d images. In Proceedings of the IEEE international conference on computer vision (pp. 4622\u20134630).","DOI":"10.1109\/ICCV.2017.495"},{"key":"1677_CR14","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C. -Y., & Berg, A.C. (2016). Ssd: Single shot multibox detector. In European conference on computer vision (pp. 21\u201337). Springer.","DOI":"10.1007\/978-3-319-46448-0_2"},{"issue":"2","key":"1677_CR15","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60(2), 91\u2013110.","journal-title":"International Journal of Computer Vision"},{"key":"1677_CR16","unstructured":"Luo, Q., Ma, H., Wang, Y., Tang, L., & Xiong, R. (2017). 3d-ssd: Learning hierarchical features from rgb-d images for amodal 3d object detection. arXiv:1711.00238."},{"key":"1677_CR17","doi-asserted-by":"crossref","unstructured":"Nathan Silberman, P. K., Hoiem, Derek, & Fergus, R. (2012). Indoor segmentation and support inference from rgbd images. In ECCV.","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"1677_CR18","doi-asserted-by":"crossref","unstructured":"Peng, C., Zhang, X., Yu, G., Luo, G., & Sun, J. (2017). Large kernel matters\u2014improve semantic segmentation by global convolutional network. In 2017 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1743\u20131751). IEEE.","DOI":"10.1109\/CVPR.2017.189"},{"key":"1677_CR19","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1016\/j.ins.2018.09.040","volume":"476","author":"MM Rahman","year":"2019","unstructured":"Rahman, M. M., Tan, Y., Xue, J., Shao, L., & 3d object detection, K. L. u. (2019). Learning 3d bounding boxes from scaled down 2d bounding boxes in rgb-d images. Information Sciences, 476, 147\u2013158.","journal-title":"Information Sciences"},{"key":"1677_CR20","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 779\u2013788).","DOI":"10.1109\/CVPR.2016.91"},{"key":"1677_CR21","doi-asserted-by":"crossref","unstructured":"Redmon, J., & Farhadi, A. (2017). Yolo9000: better, faster, stronger. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7263\u20137271).","DOI":"10.1109\/CVPR.2017.690"},{"key":"1677_CR22","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). r-cnn: Faster towards real-time object detection with region proposal networks. In Advances in neural information processing systems (pp. 91\u201399)."},{"key":"1677_CR23","doi-asserted-by":"crossref","unstructured":"Ren, Z., & Sudderth, E. B. (2016). Three-dimensional object detection and layout prediction using clouds of oriented gradients. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1525\u20131533).","DOI":"10.1109\/CVPR.2016.169"},{"issue":"2","key":"1677_CR24","doi-asserted-by":"publisher","first-page":"1179","DOI":"10.1109\/LRA.2016.2532924","volume":"1","author":"C Rennie","year":"2016","unstructured":"Rennie, C., Shome, R., Bekris, K. E., & De Souza, A. F. (2016). A dataset for improved rgbd-based object detection and pose estimation for warehouse pick-and-place. IEEE Robotics and Automation Letters, 1 (2), 1179\u20131185.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"1677_CR25","doi-asserted-by":"crossref","unstructured":"Shih, K., Chiu, C., Lin, J., & Bu, Y. (2019). Real-time object detection with reduced region proposal network via multi-feature concatenation. IEEE transactions on neural networks and learning systems.","DOI":"10.1109\/TNNLS.2019.2929059"},{"key":"1677_CR26","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556."},{"key":"1677_CR27","doi-asserted-by":"crossref","unstructured":"Song, S., Lichtenberg, S. P., & Sun, J.X. (2015). rgb-d: A rgb-d scene understanding benchmark suite. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"1677_CR28","doi-asserted-by":"crossref","unstructured":"Song, S., & Xiao, J. (2016). Deep sliding shapes for amodal 3d object detection in rgb-d images. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 808\u2013816).","DOI":"10.1109\/CVPR.2016.94"},{"issue":"4","key":"1677_CR29","doi-asserted-by":"publisher","first-page":"444","DOI":"10.1109\/70.88059","volume":"5","author":"T Tsujimura","year":"1989","unstructured":"Tsujimura, T., & Yabuta, T. (1989). Object detection by tactile sensing method employing force\/torque information. IEEE Transactions on robotics and Automation, 5(4), 444\u2013450.","journal-title":"IEEE Transactions on robotics and Automation"},{"issue":"2","key":"1677_CR30","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings, J. R., Van De Sande, K. E., Gevers, T., & Smeulders, A. W. (2013). Selective search for object recognition. International Journal of Computer Vision, 104(2), 154\u2013171.","journal-title":"International Journal of Computer Vision"},{"key":"1677_CR31","doi-asserted-by":"crossref","unstructured":"Wang, C.-C., & Thorpe, C. (2002). Simultaneous localization and mapping with detection and tracking of moving objects. In Proceedings 2002 IEEE International conference on robotics and automation (Cat. No. 02CH37292), (Vol. 3 pp. 2918\u20132924). IEEE.","DOI":"10.1109\/ROBOT.2002.1013675"},{"key":"1677_CR32","doi-asserted-by":"crossref","unstructured":"Wang, P., Chen, P., Yuan, Y., Liu, D., Huang, Z., Hou, X., & Cottrell, G. (2017). Understanding convolution for semantic segmentation. arXiv:1702.08502.","DOI":"10.1109\/WACV.2018.00163"},{"key":"1677_CR33","unstructured":"Wen, W., Wu, C., Wang, Y., Chen, Y., & Li, H. (2016). Learning structured sparsity in deep neural networks. In Advances in neural information processing systems (pp. 2074\u20132082)."},{"key":"1677_CR34","doi-asserted-by":"crossref","unstructured":"Xu, D., Anguelov, D., & Jain, A. (2018). Pointfusion: Deep sensor fusion for 3d bounding box estimation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 244\u2013253).","DOI":"10.1109\/CVPR.2018.00033"},{"key":"1677_CR35","doi-asserted-by":"crossref","unstructured":"Yoon, Y., DeSouza, G.N., & Kak, A.C. (2003). Real-time tracking and pose estimation for industrial objects using geometric features. In 2003 IEEE International conference on robotics and automation (cat. no. 03CH37422), (Vol. 3 pp. 3473\u20133478). IEEE.","DOI":"10.1109\/ROBOT.2003.1242127"},{"key":"1677_CR36","unstructured":"Yu, F., & Koltun, V. (2015). Multi-scale context aggregation by dilated convolutions. arXiv:1511.07122."},{"key":"1677_CR37","doi-asserted-by":"crossref","unstructured":"Zhiqiang, W., & Jun, L. (2017). A review of object detection based on convolutional neural network. In 2017 36th Chinese control conference (CCC) (pp. 11104\u201311109). IEEE.","DOI":"10.23919\/ChiCC.2017.8029130"},{"key":"1677_CR38","doi-asserted-by":"crossref","unstructured":"Zitnick, C. L., & Doll\u00e1r, P. (2014). Edge boxes: Locating object proposals from edges. In European conference on computer vision (pp. 391\u2013405). Springer.","DOI":"10.1007\/978-3-319-10602-1_26"}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-021-01677-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11265-021-01677-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-021-01677-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T20:33:26Z","timestamp":1634675606000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11265-021-01677-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,2]]},"references-count":38,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2021,10]]}},"alternative-id":["1677"],"URL":"https:\/\/doi.org\/10.1007\/s11265-021-01677-9","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"value":"1939-8018","type":"print"},{"value":"1939-8115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,8,2]]},"assertion":[{"value":"22 June 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 May 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 June 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 August 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}