{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T12:32:12Z","timestamp":1771849932113,"version":"3.50.1"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030110147","type":"print"},{"value":"9783030110154","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-11015-4_53","type":"book-chapter","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T06:42:47Z","timestamp":1548312167000},"page":"702-715","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["End-to-End 6-DoF Object Pose Estimation Through Differentiable Rasterization"],"prefix":"10.1007","author":[{"given":"Andrea","family":"Palazzi","sequence":"first","affiliation":[]},{"given":"Luca","family":"Bergamini","sequence":"additional","affiliation":[]},{"given":"Simone","family":"Calderara","sequence":"additional","affiliation":[]},{"given":"Rita","family":"Cucchiara","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,1,23]]},"reference":[{"issue":"10","key":"53_CR1","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1145\/2001269.2001293","volume":"54","author":"S Agarwal","year":"2011","unstructured":"Agarwal, S., et al.: Building Rome in a day. Commun. ACM 54(10), 105\u2013112 (2011)","journal-title":"Commun. ACM"},{"key":"53_CR2","doi-asserted-by":"crossref","unstructured":"Aubry, M., Maturana, D., Efros, A.A., Russell, B.C., Sivic, J.: Seeing 3D chairs: exemplar part-based 2D\u20133D alignment using a large dataset of CAD models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3762\u20133769 (2014)","DOI":"10.1109\/CVPR.2014.487"},{"key":"53_CR3","unstructured":"Blender Online Community: Blender - a 3D modelling and rendering package. Blender Foundation, Blender Institute, Amsterdam (2017). http:\/\/www.blender.org"},{"key":"53_CR4","unstructured":"Boyer, E., Franco, J.S.: A hybrid approach for computing visual hulls of complex objects. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 695\u2013701. IEEE Computer Society Press (2003)"},{"key":"53_CR5","unstructured":"Chang, A.X., et al.: ShapeNet: an information-rich 3D model repository. arXiv preprint arXiv:1512.03012 (2015)"},{"issue":"4","key":"53_CR6","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"53_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"628","DOI":"10.1007\/978-3-319-46484-8_38","volume-title":"Computer Vision \u2013 ECCV 2016","author":"CB Choy","year":"2016","unstructured":"Choy, C.B., Xu, D., Gwak, J.Y., Chen, K., Savarese, S.: 3D-R2N2: a unified approach for single and multi-view 3D object reconstruction. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 628\u2013644. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_38"},{"key":"53_CR8","doi-asserted-by":"crossref","unstructured":"Collet, A., Berenson, D., Srinivasa, S.S., Ferguson, D.: Object recognition and full pose registration from a single image for robotic manipulation. In: IEEE International Conference on Robotics and Automation, ICRA 2009, pp. 48\u201355. IEEE (2009)","DOI":"10.1109\/ROBOT.2009.5152739"},{"issue":"10","key":"53_CR9","doi-asserted-by":"publisher","first-page":"1284","DOI":"10.1177\/0278364911401765","volume":"30","author":"A Collet","year":"2011","unstructured":"Collet, A., Martinez, M., Srinivasa, S.S.: The moped framework: object recognition and pose estimation for manipulation. Int. J. Robot. Res. 30(10), 1284\u20131306 (2011)","journal-title":"Int. J. Robot. Res."},{"key":"53_CR10","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2009, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"4","key":"53_CR11","first-page":"692","volume":"39","author":"A Dosovitskiy","year":"2017","unstructured":"Dosovitskiy, A., Springenberg, J.T., Tatarchenko, M., Brox, T.: Learning to generate chairs, tables and cars with convolutional networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 692\u2013705 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"53_CR12","doi-asserted-by":"crossref","unstructured":"Du, X., Ang Jr., M.H., Karaman, S., Rus, D.: A general pipeline for 3D detection of vehicles. In: ICRA (2018)","DOI":"10.1109\/ICRA.2018.8461232"},{"key":"53_CR13","unstructured":"Fitzgibbon, A., Zisserman, A.: Automatic 3D model acquisition and generation of new images from video sequences. In: 9th European Signal Processing Conference (EUSIPCO 1998), pp. 1\u20138. IEEE (1998)"},{"key":"53_CR14","doi-asserted-by":"crossref","unstructured":"Gadelha, M., Maji, S., Wang, R.: 3D shape induction from 2D views of multiple objects. 3D Vision (2017)","DOI":"10.1109\/3DV.2017.00053"},{"key":"53_CR15","doi-asserted-by":"crossref","unstructured":"Gortler, S.J., Grzeszczuk, R., Szeliski, R., Cohen, M.F.: The lumigraph. In: Proceedings of the 23rd Annual Conference on Computer Graphics and Interactive Techniques, pp. 43\u201354. ACM (1996)","DOI":"10.1145\/237170.237200"},{"key":"53_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"2","key":"53_CR17","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s10851-009-0161-2","volume":"35","author":"DQ Huynh","year":"2009","unstructured":"Huynh, D.Q.: Metrics for 3D rotations: comparison and analysis. J. Math. Imaging Vis. 35(2), 155\u2013164 (2009)","journal-title":"J. Math. Imaging Vis."},{"key":"53_CR18","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al.: Spatial transformer networks. In: Advances in Neural Information Processing Systems, pp. 2017\u20132025 (2015)"},{"key":"53_CR19","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"issue":"1","key":"53_CR20","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1007\/s11263-009-0233-1","volume":"84","author":"K Kolev","year":"2009","unstructured":"Kolev, K., Klodt, M., Brox, T., Cremers, D.: Continuous global optimization in multiview 3D reconstruction. Int. J. Comput. Vis. 84(1), 80\u201396 (2009)","journal-title":"Int. J. Comput. Vis."},{"issue":"2","key":"53_CR21","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s11263-008-0152-6","volume":"81","author":"V Lepetit","year":"2009","unstructured":"Lepetit, V., Moreno-Noguer, F., Fua, P.: EPnP: an accurate O(n) solution to the PnP problem. Int. J. Comput. Vis. 81(2), 155 (2009)","journal-title":"Int. J. Comput. Vis."},{"key":"53_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"478","DOI":"10.1007\/978-3-319-10599-4_31","volume-title":"Computer Vision \u2013 ECCV 2014","author":"JJ Lim","year":"2014","unstructured":"Lim, J.J., Khosla, A., Torralba, A.: FPM: fine pose parts-based model with 3D CAD models. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 478\u2013493. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_31"},{"key":"53_CR23","unstructured":"Long, J.L., Zhang, N., Darrell, T.: Do convnets learn correspondence? In: Advances in Neural Information Processing Systems, pp. 1601\u20131609 (2014)"},{"key":"53_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-319-10584-0_11","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MM Loper","year":"2014","unstructured":"Loper, M.M., Black, M.J.: OpenDR: an approximate differentiable renderer. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 154\u2013169. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_11"},{"issue":"2","key":"53_CR25","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vis. 60(2), 91\u2013110 (2004)","journal-title":"Int. J. Comput. Vis."},{"key":"53_CR26","doi-asserted-by":"crossref","unstructured":"Moreno-Noguer, F., Lepetit, V., Fua, P.: Accurate non-iterative O(n) solution to the PnP problem. In: IEEE 11th international conference on Computer vision, ICCV 2007, pp. 1\u20138. IEEE (2007)","DOI":"10.1109\/ICCV.2007.4409116"},{"key":"53_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Newell","year":"2016","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 483\u2013499. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"key":"53_CR28","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Chan, A., Derpanis, K.G., Daniilidis, K.: 6-DoF object pose from semantic keypoints. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 2011\u20132018. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989233"},{"key":"53_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/3-540-49437-5_10","volume-title":"3D Structure from Multiple Images of Large-Scale Environments","author":"M Pollefeys","year":"1998","unstructured":"Pollefeys, M., Koch, R., Vergauwen, M., Van Gool, L.: Metric 3D surface reconstruction from uncalibrated image sequences. In: Koch, R., Van Gool, L. (eds.) SMILE 1998. LNCS, vol. 1506, pp. 139\u2013154. Springer, Heidelberg (1998). https:\/\/doi.org\/10.1007\/3-540-49437-5_10"},{"key":"53_CR30","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"53_CR31","unstructured":"Rezende, D.J., Eslami, S.A., Mohamed, S., Battaglia, P., Jaderberg, M., Heess, N.: Unsupervised learning of 3D structure from images. In: Advances in Neural Information Processing Systems, pp. 4996\u20135004 (2016)"},{"key":"53_CR32","doi-asserted-by":"crossref","unstructured":"Saponaro, P., Sorensen, S., Rhein, S., Mahoney, A.R., Kambhamettu, C.: Reconstruction of textureless regions using structure from motion and image-based interpolation. In: 2014 IEEE International Conference on Image Processing (ICIP), pp. 1847\u20131851. IEEE (2014)","DOI":"10.1109\/ICIP.2014.7025370"},{"key":"53_CR33","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"53_CR34","doi-asserted-by":"crossref","unstructured":"Sinha, A., Unmesh, A., Huang, Q., Ramani, K.: SurfNet: generating 3D shape surfaces using deep residual networks. In: Proceedings of CVPR (2017)","DOI":"10.1109\/CVPR.2017.91"},{"issue":"2","key":"53_CR35","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/j.cviu.2007.10.001","volume":"111","author":"J Starck","year":"2008","unstructured":"Starck, J., Hilton, A.: Model-based human shape reconstruction from multiple views. Comput. Vis. Image Underst. 111(2), 179\u2013194 (2008)","journal-title":"Comput. Vis. Image Underst."},{"key":"53_CR36","doi-asserted-by":"crossref","unstructured":"Stark, M., Goesele, M., Schiele, B.: Back to the future: learning shape models from 3D CAD data. In: BMVC, vol. 2, p. 5. Citeseer (2010)","DOI":"10.5244\/C.24.106"},{"key":"53_CR37","doi-asserted-by":"crossref","unstructured":"Su, H., Qi, C.R., Li, Y., Guibas, L.J.: Render for CNN: viewpoint estimation in images using CNNs trained with rendered 3D model views. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2686\u20132694 (2015)","DOI":"10.1109\/ICCV.2015.308"},{"key":"53_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1007\/978-3-319-46478-7_20","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Tatarchenko","year":"2016","unstructured":"Tatarchenko, M., Dosovitskiy, A., Brox, T.: Multi-view 3D models from single images with a convolutional network. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 322\u2013337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_20"},{"key":"53_CR39","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: DeepPose: human pose estimation via deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1653\u20131660 (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"53_CR40","doi-asserted-by":"crossref","unstructured":"Tulsiani, S., Malik, J.: Viewpoints and keypoints. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1510\u20131519 (2015)","DOI":"10.1109\/CVPR.2015.7298758"},{"key":"53_CR41","doi-asserted-by":"crossref","unstructured":"Tulsiani, S., Zhou, T., Efros, A.A., Malik, J.: Multi-view supervision for single-view reconstruction via differentiable ray consistency. In: CVPR, vol. 1, p. 3 (2017)","DOI":"10.1109\/CVPR.2017.30"},{"issue":"12","key":"53_CR42","doi-asserted-by":"publisher","first-page":"2241","DOI":"10.1109\/TPAMI.2007.70712","volume":"29","author":"G Vogiatzis","year":"2007","unstructured":"Vogiatzis, G., Esteban, C.H., Torr, P.H., Cipolla, R.: Multiview stereo via volumetric graph-cuts and occlusion robust photo-consistency. IEEE Trans. Pattern Anal. Mach. Intell. 29(12), 2241\u20132246 (2007)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"53_CR43","doi-asserted-by":"crossref","unstructured":"Wei, S.E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4724\u20134732 (2016)","DOI":"10.1109\/CVPR.2016.511"},{"key":"53_CR44","doi-asserted-by":"crossref","unstructured":"Wiles, O., Zisserman, A.: SilNet: single-and multi-view reconstruction by learning from silhouettes. In: British Machine Vision Conference (2017)","DOI":"10.5244\/C.31.99"},{"key":"53_CR45","first-page":"5","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8, 5\u201332 (1992)","journal-title":"Mach. Learn."},{"key":"53_CR46","unstructured":"Wu, J., Zhang, C., Xue, T., Freeman, B., Tenenbaum, J.: Learning a probabilistic latent space of object shapes via 3D generative-adversarial modeling. In: Advances in Neural Information Processing Systems, pp. 82\u201390 (2016)"},{"key":"53_CR47","unstructured":"Yan, X., Yang, J., Yumer, E., Guo, Y., Lee, H.: Perspective transformer nets: learning single-view 3D object reconstruction without 3D supervision. In: Advances in Neural Information Processing Systems, pp. 1696\u20131704 (2016)"},{"key":"53_CR48","unstructured":"Yang, J., Reed, S.E., Yang, M.H., Lee, H.: Weakly-supervised disentangling with recurrent transformations for 3D view synthesis. In: Advances in Neural Information Processing Systems, pp. 1099\u20131107 (2015)"},{"key":"53_CR49","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhu, M., Leonardos, S., Derpanis, K.G., Daniilidis, K.: Sparseness meets deepness: 3D human pose estimation from monocular video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4966\u20134975 (2016)","DOI":"10.1109\/CVPR.2016.537"},{"key":"53_CR50","doi-asserted-by":"crossref","unstructured":"Zhu, M., Zhou, X., Daniilidis, K.: Single image pop-up from discriminatively learned parts. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 927\u2013935 (2015)","DOI":"10.1109\/ICCV.2015.112"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-11015-4_53","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,22]],"date-time":"2023-01-22T01:37:48Z","timestamp":1674351468000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-11015-4_53"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030110147","9783030110154"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-11015-4_53","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"23 January 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}