{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T12:16:24Z","timestamp":1764936984388,"version":"3.37.3"},"reference-count":156,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2018,6,30]],"date-time":"2018-06-30T00:00:00Z","timestamp":1530316800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,6,30]],"date-time":"2018-06-30T00:00:00Z","timestamp":1530316800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"crossref","award":["N66001-17-2-4029"],"award-info":[{"award-number":["N66001-17-2-4029"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["N66001-15-C-4035"],"award-info":[{"award-number":["N66001-15-C-4035"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007297","name":"Office of Naval Research Global","doi-asserted-by":"publisher","award":["N00014-16-1-2007"],"award-info":[{"award-number":["N00014-16-1-2007"]}],"id":[{"id":"10.13039\/100007297","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000005","name":"U.S. Department of Defense","doi-asserted-by":"crossref","award":["W81XWH-15-1-0147"],"award-info":[{"award-number":["W81XWH-15-1-0147"]}],"id":[{"id":"10.13039\/100000005","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2018,9]]},"DOI":"10.1007\/s11263-018-1103-5","type":"journal-article","created":{"date-parts":[[2018,6,30]],"date-time":"2018-06-30T05:16:24Z","timestamp":1530335784000},"page":"920-941","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":58,"title":["Configurable 3D Scene Synthesis and 2D Image Rendering with Per-pixel Ground Truth Using Stochastic Grammars"],"prefix":"10.1007","volume":"126","author":[{"given":"Chenfanfu","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Siyuan","family":"Qi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7024-1545","authenticated-orcid":false,"given":"Yixin","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Siyuan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Jenny","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Lap-Fai","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Demetri","family":"Terzopoulos","sequence":"additional","affiliation":[]},{"given":"Song-Chun","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,6,30]]},"reference":[{"key":"1103_CR1","unstructured":"Aldous, D. J. (1985). Exchangeability and related topics. In \u00c9cole d\u2019\u00c9t\u00e9 de Probabilit\u00e9s de Saint-Flour XIII 1983 (pp. 1\u2013198). Berlin: Springer."},{"key":"1103_CR2","doi-asserted-by":"publisher","DOI":"10.1515\/9783110806984","volume-title":"Color vision: Perspectives from different disciplines","author":"WG Backhaus","year":"1998","unstructured":"Backhaus, W. G., Kliegl, R., & Werner, J. S. (1998). Color vision: Perspectives from different disciplines. Berlin: Walter de Gruyter."},{"key":"1103_CR3","doi-asserted-by":"crossref","unstructured":"Bansal, A., Russell, B., & Gupta, A. (2016). Marr revisited: 2D-3D alignment via surface normal prediction. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.642"},{"key":"1103_CR4","doi-asserted-by":"crossref","unstructured":"Bar-Aviv, E., & Rivlin, E. (2006). Functional 3D object classification using simulation of embodied agent. In British machine vision conference (BMVC).","DOI":"10.5244\/C.20.32"},{"issue":"8","key":"1103_CR5","doi-asserted-by":"publisher","first-page":"1670","DOI":"10.1109\/TPAMI.2014.2377712","volume":"37","author":"JT Barron","year":"2015","unstructured":"Barron, J. T., & Malik, J. (2015). Shape, illumination, and reflectance from shading. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 37(8), 1670\u201387.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR6","doi-asserted-by":"crossref","unstructured":"Bartell, F., Dereniak, E., & Wolfe, W. (1981). The theory and measurement of bidirectional reflectance distribution function (brdf) and bidirectional transmittance distribution function (btdf). In Radiation scattering in optical systems (Vol. 257, pp. 154\u2013161). International Society for Optics and Photonics.","DOI":"10.1117\/12.959611"},{"issue":"4","key":"1103_CR7","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1145\/2601097.2601206","volume":"33","author":"S Bell","year":"2014","unstructured":"Bell, S., Bala, K., & Snavely, N. (2014). Intrinsic images in the wild. ACM Transactions on Graphics (TOG), 33(4), 98.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"4","key":"1103_CR8","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1145\/2461912.2462002","volume":"32","author":"S Bell","year":"2013","unstructured":"Bell, S., Upchurch, P., Snavely, N., & Bala, K. (2013). Opensurfaces: A richly annotated catalog of surface appearance. ACM Transactions on Graphics (TOG), 32(4), 111.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"1103_CR9","doi-asserted-by":"crossref","unstructured":"Bell, S., Upchurch, P., Snavely, N., & Bala, K. (2015). Material recognition in the wild with the materials in context database. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298970"},{"key":"1103_CR10","doi-asserted-by":"crossref","unstructured":"Ben-David, S., Blitzer, J., Crammer, K., & Pereira, F. (2007). Analysis of representations for domain adaptation. In Advances in neural information processing systems (NIPS).","DOI":"10.7551\/mitpress\/7503.003.0022"},{"key":"1103_CR11","first-page":"2137","volume":"10","author":"S Bickel","year":"2009","unstructured":"Bickel, S., Br\u00fcckner, M., & Scheffer, T. (2009). Discriminative learning under covariate shift. Journal of Machine Learning Research, 10, 2137\u20132155.","journal-title":"Journal of Machine Learning Research"},{"key":"1103_CR12","doi-asserted-by":"crossref","unstructured":"Blitzer, J., McDonald, R., & Pereira, F. (2006). Domain adaptation with structural correspondence learning. In Empirical methods in natural language processing (EMNLP).","DOI":"10.3115\/1610075.1610094"},{"key":"1103_CR13","first-page":"33","volume":"10","author":"MA Carreira-Perpinan","year":"2005","unstructured":"Carreira-Perpinan, M. A., & Hinton, G. E. (2005). On contrastive divergence learning. AI Stats, 10, 33\u201340.","journal-title":"AI Stats"},{"key":"1103_CR14","unstructured":"Chang, A. X., Funkhouser, T., Guibas, L., Hanrahan, P., Huang, Q., Li, Z., Savarese, S., Savva, M., Song, S., Su, H., Xiao, J., Yi, L., & Yu, F. (2015). ShapeNet: An information-rich 3D model repository. arXiv preprint arXiv:1512.03012 ."},{"key":"1103_CR15","unstructured":"Chang, A. X., Funkhouser, T., Guibas, L., Hanrahan, P., Huang, Q., Li, Z., Savarese, S., Savva, M., Song, S., & Su, H., et\u00a0al. (2015). Shapenet: An information-rich 3D model repository. arXiv preprint arXiv:1512.03012 ."},{"key":"1103_CR16","unstructured":"Chapelle, O., & Harchaoui, Z. (2005). A machine learning approach to conjoint analysis. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR17","unstructured":"Chen, L. C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2016). Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. arXiv preprint arXiv:1606.00915 ."},{"key":"1103_CR18","doi-asserted-by":"crossref","unstructured":"Chen, W., Wang, H., Li, Y., Su, H., Lischinsk, D., Cohen-Or, D., & Chen, B., et\u00a0al. (2016). Synthesizing training images for boosting human 3D pose estimation. In International conference on 3D vision (3DV).","DOI":"10.1109\/3DV.2016.58"},{"issue":"2","key":"1103_CR19","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1007\/s11263-014-0779-4","volume":"112","author":"W Choi","year":"2015","unstructured":"Choi, W., Chao, Y. W., Pantofaru, C., & Savarese, S. (2015). Indoor scene understanding with geometric and semantic contexts. International Journal of Computer Vision (IJCV), 112(2), 204\u2013220.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1103_CR20","doi-asserted-by":"crossref","unstructured":"Cortes, C., Mohri, M., Riley, M., & Rostamizadeh, A. (2008). Sample selection bias correction theory. In International conference on algorithmic learning theory.","DOI":"10.1007\/978-3-540-87987-9_8"},{"key":"1103_CR21","doi-asserted-by":"crossref","unstructured":"Csurka, G. (2017). Domain adaptation for visual applications: A comprehensive survey. arXiv preprint arXiv:1702.05374 .","DOI":"10.1007\/978-3-319-58347-1"},{"key":"1103_CR22","unstructured":"Daum\u00e9\u00a0III, H. (2007). Frustratingly easy domain adaptation. In Annual meeting of the association for computational linguistics (ACL)."},{"key":"1103_CR23","unstructured":"Daum\u00e9\u00a0III, H. (2009). Bayesian multitask learning with latent hierarchies. In Conference on uncertainty in artificial intelligence (UAI)."},{"key":"1103_CR24","doi-asserted-by":"crossref","unstructured":"Del\u00a0Pero, L., Bowdish, J., Fried, D., Kermgard, B., Hartley, E., & Barnard, K. (2012). Bayesian geometric modeling of indoor scenes. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2012.6247994"},{"key":"1103_CR25","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1103_CR26","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., Fischer, P., Ilg, E., Hausser, P., Hazirbas, C., Golkov, V., van\u00a0der Smagt, P., Cremers, D., & Brox, T. (2015). Flownet: Learning optical flow with convolutional networks. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/ICCV.2015.316"},{"key":"1103_CR27","doi-asserted-by":"crossref","unstructured":"Du, Y., Wong, Y., Liu, Y., Han, F., Gui, Y., Wang, Z., Kankanhalli, M., & Geng, W. (2016). Marker-less 3d human motion capture with monocular image sequence and height-maps. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46493-0_2"},{"key":"1103_CR28","doi-asserted-by":"crossref","unstructured":"Eigen, D., & Fergus, R. (2015). Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.304"},{"key":"1103_CR29","unstructured":"Eigen, D., Puhrsch, C., & Fergus, R. (2014). Depth map prediction from a single image using a multi-scale deep network. In Advances in neural information processing systems (NIPS)."},{"issue":"1","key":"1103_CR30","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S. A., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2015). The pascal visual object classes challenge: A retrospective. International Journal of Computer Vision (IJCV), 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1103_CR31","doi-asserted-by":"crossref","unstructured":"Evgeniou, T., & Pontil, M. (2004). Regularized multi\u2013task learning. In International conference on knowledge discovery and data mining (SIGKDD).","DOI":"10.1145\/1014052.1014067"},{"issue":"4","key":"1103_CR32","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1145\/2601097.2601223","volume":"33","author":"SR Fanello","year":"2014","unstructured":"Fanello, S. R., Keskin, C., Izadi, S., Kohli, P., Kim, D., Sweeney, D., et al. (2014). Learning to be a depth camera for close-range human capture and interaction. ACM Transactions on Graphics (TOG), 33(4), 86.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"6","key":"1103_CR33","doi-asserted-by":"publisher","first-page":"208-1","DOI":"10.1145\/2366145.2366154","volume":"31","author":"M Fisher","year":"2012","unstructured":"Fisher, M., Ritchie, D., Savva, M., Funkhouser, T., & Hanrahan, P. (2012). Example-based synthesis of 3D object arrangements. ACM Transactions on Graphics (TOG), 31(6), 208-1\u2013208-12.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"4","key":"1103_CR34","doi-asserted-by":"publisher","first-page":"107-1","DOI":"10.1145\/2010324.1964929","volume":"30","author":"M Fisher","year":"2011","unstructured":"Fisher, M., Savva, M., & Hanrahan, P. (2011). Characterizing structural relationships in scenes using graph kernels. ACM Transactions on Graphics (TOG), 30(4), 107-1\u2013107-12.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"1103_CR35","doi-asserted-by":"crossref","unstructured":"Fouhey, D. F., Gupta, A., & Hebert, M. (2013). Data-driven 3d primitives for single image understanding. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2013.421"},{"issue":"14","key":"1103_CR36","doi-asserted-by":"publisher","first-page":"8093","DOI":"10.1073\/pnas.0731829100","volume":"100","author":"A Fridman","year":"2003","unstructured":"Fridman, A. (2003). Mixed markov models. Proceedings of the National Academy of Sciences (PNAS), 100(14), 8093.","journal-title":"Proceedings of the National Academy of Sciences (PNAS)"},{"key":"1103_CR37","unstructured":"Gaidon, A., Wang, Q., Cabon, Y., & Vig, E. (2016). Virtual worlds as proxy for multi-object tracking analysis. In Conference on computer vision and pattern recognition (CVPR)."},{"key":"1103_CR38","unstructured":"Ganin, Y., & Lempitsky, V. (2015). Unsupervised domain adaptation by backpropagation. In International conference on machine learning (ICML)."},{"key":"1103_CR39","doi-asserted-by":"crossref","unstructured":"Ghezelghieh, M. F., Kasturi, R., & Sarkar, S. (2016). Learning camera viewpoint using cnn to improve 3D body pose estimation. In International conference on 3D vision (3DV).","DOI":"10.1109\/3DV.2016.75"},{"key":"1103_CR40","doi-asserted-by":"crossref","unstructured":"Grabner, H., Gall, J., & Van\u00a0Gool, L. (2011). What makes a chair a chair? In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2011.5995327"},{"key":"1103_CR41","unstructured":"Gregor, K., Danihelka, I., Graves, A., Rezende, D. J., & Wierstra, D. (2015) Draw: A recurrent neural network for image generation. arXiv preprint arXiv:1502.04623 ."},{"key":"1103_CR42","unstructured":"Gretton, A., Smola, A. J., Huang, J., Schmittfull, M., Borgwardt, K. M., & Sch\u00f6llkopf, B. (2009). Covariate shift by kernel mean matching. In Dataset shift in machine learning (pp. 131\u2013160). MIT Press."},{"key":"1103_CR43","unstructured":"Gupta, A., Hebert, M., Kanade, T., & Blei, D. M. (2010). Estimating spatial layout of rooms using volumetric reasoning about objects and surfaces. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR44","doi-asserted-by":"crossref","unstructured":"Gupta, A., Satkin, S., Efros, A. A., & Hebert, M. (2011). From 3D scene geometry to human workspace. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2011.5995448"},{"key":"1103_CR45","unstructured":"Handa, A., P\u0103tr\u0103ucean, V., Badrinarayanan, V., Stent, S., & Cipolla, R. (2016). Understanding real world indoor scenes with synthetic data. In Conference on computer vision and pattern recognition (CVPR)."},{"key":"1103_CR46","doi-asserted-by":"crossref","unstructured":"Handa, A., Patraucean, V., Stent, S., & Cipolla, R. (2016). Scenenet: an annotated model generator for indoor scene understanding. In International conference on robotics and automation (ICRA).","DOI":"10.1109\/ICRA.2016.7487797"},{"key":"1103_CR47","doi-asserted-by":"crossref","unstructured":"Handa, A., Whelan, T., McDonald, J., & Davison, A. J. (2014). A benchmark for rgb-d visual odometry, 3D reconstruction and slam. In International conference on robotics and automation (ICRA).","DOI":"10.1109\/ICRA.2014.6907054"},{"issue":"4","key":"1103_CR48","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1109\/TPAMI.2005.82","volume":"27","author":"K Hara","year":"2005","unstructured":"Hara, K., Nishino, K., et al. (2005). Light source position and reflectance estimation from a single view without the distant illumination assumption. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 27(4), 493\u2013505.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR49","doi-asserted-by":"crossref","unstructured":"Hattori, H., Naresh\u00a0Boddeti, V., Kitani, K. M., & Kanade, T. (2015). Learning scene-specific pedestrian detectors without real data. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7299006"},{"key":"1103_CR50","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.123"},{"key":"1103_CR51","doi-asserted-by":"crossref","unstructured":"Heckman, J. J. (1977). Sample selection bias as a specification error (with an application to the estimation of labor supply functions). Massachusetts: National Bureau of Economic Research Cambridge","DOI":"10.3386\/w0172"},{"key":"1103_CR52","doi-asserted-by":"crossref","unstructured":"Hedau, V., Hoiem, D., & Forsyth, D. (2009). Recovering the spatial layout of cluttered rooms. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2009.5459411"},{"key":"1103_CR53","unstructured":"Heess, N., Sriram, S., Lemmon, J., Merel, J., Wayne, G., Tassa, Y., Erez, T., Wang, Z., Eslami, A., & Riedmiller, M., et\u00a0al. (2017). Emergence of locomotion behaviours in rich environments. arXiv preprint arXiv:1707.02286 ."},{"key":"1103_CR54","unstructured":"Hermans, T., Rehg, J. M., & Bobick, A. (2011). Affordance prediction via learned object attributes. In International conference on robotics and automation (ICRA)."},{"issue":"8","key":"1103_CR55","doi-asserted-by":"publisher","first-page":"1771","DOI":"10.1162\/089976602760128018","volume":"14","author":"GE Hinton","year":"2002","unstructured":"Hinton, G. E. (2002). Training products of experts by minimizing contrastive divergence. Neural Computation, 14(8), 1771\u20131800.","journal-title":"Neural Computation"},{"issue":"5786","key":"1103_CR56","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G. E., & Salakhutdinov, R. R. (2006). Reducing the dimensionality of data with neural networks. Science, 313(5786), 504\u2013507.","journal-title":"Science"},{"issue":"3","key":"1103_CR57","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1145\/1073204.1073232","volume":"24","author":"D Hoiem","year":"2005","unstructured":"Hoiem, D., Efros, A. A., & Hebert, M. (2005). Automatic photo pop-up. ACM Transactions on Graphics (TOG), 24(3), 577\u2013584.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"1103_CR58","doi-asserted-by":"crossref","unstructured":"Huang, Q., Wang, H., & Koltun, V. (2015). Single-view reconstruction via joint analysis of image and shape collections. ACM Transactions on Graphics (TOG). https:\/\/doi.org\/10.1145\/2766890 .","DOI":"10.1145\/2766890"},{"key":"1103_CR59","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Koppula, H., & Saxena, A. (2013). Hallucinated humans as the hidden context for labeling 3D scenes. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2013.385"},{"key":"1103_CR60","unstructured":"Kohli, Y. Z. M. B. P., Izadi, S., & Xiao, J. (2016). Deepcontext: Context-encoding neural pathways for 3D holistic scene understanding. arXiv preprint arXiv:1603.04922 ."},{"key":"1103_CR61","doi-asserted-by":"crossref","unstructured":"Koppula, H. S., & Saxena, A. (2014). Physically grounded spatio-temporal object affordances. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-10578-9_54"},{"issue":"1","key":"1103_CR62","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TPAMI.2015.2430335","volume":"38","author":"HS Koppula","year":"2016","unstructured":"Koppula, H. S., & Saxena, A. (2016). Anticipating human activities using object affordances for reactive robotic response. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 38(1), 14\u201329.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR63","doi-asserted-by":"crossref","unstructured":"Kratz, L., & Nishino, K. (2009). Factorizing scene albedo and depth from a single foggy image. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2009.5459382"},{"key":"1103_CR64","doi-asserted-by":"crossref","unstructured":"Kulkarni, T. D., Kohli, P., Tenenbaum, J. B., & Mansinghka, V. (2015). Picture: A probabilistic programming language for scene perception. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7299068"},{"key":"1103_CR65","unstructured":"Kulkarni, T. D., Whitney, W. F., Kohli, P., & Tenenbaum, J. (2015). Deep convolutional inverse graphics network. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR66","unstructured":"Laina, I., Rupprecht, C., Belagiannis, V., Tombari, F., & Navab, N. (2016). Deeper depth prediction with fully convolutional residual networks. arXiv preprint arXiv:1606.00373 ."},{"key":"1103_CR67","doi-asserted-by":"crossref","unstructured":"Lee, D. C., Hebert, M., & Kanade, T. (2009). Geometric reasoning for single image structure recovery. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2009.5206872"},{"key":"1103_CR68","unstructured":"Liang, W., Zhao, Y., Zhu, Y., & Zhu, S.C. (2016). What is where: Inferring containment relations from videos. In International joint conference on artificial intelligence (IJCAI)."},{"key":"1103_CR69","unstructured":"Lin, J., Guo, X., Shao, J., Jiang, C., Zhu, Y., & Zhu, S. C. (2016). A virtual reality platform for dynamic human-scene interaction. In SIGGRAPH ASIA 2016 virtual reality meets physical reality: Modelling and simulating virtual humans and environments (pp. 11). ACM."},{"key":"1103_CR70","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft coco: Common objects in context. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1103_CR71","doi-asserted-by":"crossref","unstructured":"Liu, F., Shen, C., & Lin, G. (2015). Deep convolutional neural fields for depth estimation from a single image. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1007\/978-1-4471-6741-9"},{"key":"1103_CR72","doi-asserted-by":"crossref","unstructured":"Liu, X., Zhao, Y., & Zhu, S. C. (2014). Single-view 3d scene parsing by attributed grammar. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1007\/978-1-4471-6515-6"},{"issue":"1","key":"1103_CR73","first-page":"2321","volume":"38","author":"S Lombardi","year":"2016","unstructured":"Lombardi, S., & Nishino, K. (2016). Reflectance and illumination recovery in the wild. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 38(1), 2321\u20132334.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR74","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1103_CR75","doi-asserted-by":"crossref","unstructured":"Loper, M. M., & Black, M. J. (2014). Opendr: An approximate differentiable renderer. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-10584-0_11"},{"key":"1103_CR76","doi-asserted-by":"crossref","unstructured":"L\u00f3pez, A. M., Xu, J., G\u00f3mez, J. L., V\u00e1zquez, D., & Ros, G. (2017). From virtual to real world visual perception using domain adaptation the dpm as example. In Domain adaptation in computer vision applications (pp. 243\u2013258). Springer.","DOI":"10.1007\/978-3-319-58347-1_13"},{"key":"1103_CR77","doi-asserted-by":"crossref","unstructured":"Lu, Y., Zhu, S. C., & Wu, Y. N. (2016). Learning frame models using cnn filters. In AAAI Conference on artificial intelligence (AAAI).","DOI":"10.1609\/aaai.v30i1.10238"},{"key":"1103_CR78","doi-asserted-by":"crossref","unstructured":"Mallya, A., & Lazebnik, S. (2015). Learning informative edge maps for indoor scene layout prediction. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.113"},{"key":"1103_CR79","unstructured":"Mansinghka, V., Kulkarni, T. D., Perov, Y. N., & Tenenbaum, J. (2013). Approximate bayesian image interpretation using generative probabilistic graphics programs. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR80","unstructured":"Mansour, Y., Mohri, M., & Rostamizadeh, A. (2009). Domain adaptation: Learning bounds and algorithms. In Annual conference on learning theory (COLT)."},{"key":"1103_CR81","doi-asserted-by":"crossref","unstructured":"Marin, J., V\u00e1zquez, D., Ger\u00f3nimo, D., & L\u00f3pez, A. M. (2010). Learning appearance in virtual scenarios for pedestrian detection. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2010.5540218"},{"key":"1103_CR82","doi-asserted-by":"crossref","unstructured":"Merrell, P., Schkufza, E., Li, Z., Agrawala, M., & Koltun, V. (2011). Interactive furniture layout using interior design guidelines. ACM Transactions on Graphics (TOG). https:\/\/doi.org\/10.1145\/2010324.1964982 .","DOI":"10.1145\/2010324.1964982"},{"key":"1103_CR83","doi-asserted-by":"crossref","unstructured":"Movshovitz-Attias, Y., Kanade, T., & Sheikh, Y. (2016). How useful is photo-realistic rendering for visual learning? In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-49409-8_18"},{"key":"1103_CR84","doi-asserted-by":"crossref","unstructured":"Movshovitz-Attias, Y., Sheikh, Y., Boddeti, V. N., & Wei, Z. (2014). 3D pose-by-detection of vehicles via discriminatively reduced ensembles of correlation filters. In British machine vision conference (BMVC).","DOI":"10.5244\/C.28.53"},{"key":"1103_CR85","unstructured":"Myers, A., Kanazawa, A., Fermuller, C., & Aloimonos, Y. (2014). Affordance of object parts from geometric features. In Workshop on Vision meets Cognition, CVPR."},{"key":"1103_CR86","doi-asserted-by":"crossref","unstructured":"Nishino, K., Zhang, Z., Ikeuchi, K. (2001). Determining reflectance parameters and illumination distribution from a sparse set of images for view-dependent image synthesis. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2001.937573"},{"key":"1103_CR87","doi-asserted-by":"crossref","unstructured":"Noh, H., Hong, S., & Han, B. (2015). Learning deconvolution network for semantic segmentation. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.178"},{"key":"1103_CR88","doi-asserted-by":"crossref","unstructured":"Oxholm, G., & Nishino, K. (2014). Multiview shape and reflectance from natural illumination. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2014.277"},{"issue":"2","key":"1103_CR89","first-page":"2321","volume":"38","author":"G Oxholm","year":"2016","unstructured":"Oxholm, G., & Nishino, K. (2016). Shape and reflectance estimation in the wild. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 38(2), 2321\u20132334.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR90","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803161","volume-title":"Causality","author":"J Pearl","year":"2009","unstructured":"Pearl, J. (2009). Causality. Cambridge: Cambridge University Press."},{"key":"1103_CR91","doi-asserted-by":"crossref","unstructured":"Peng, X., Sun, B., Ali, K., & Saenko, K. (2015). Learning deep object detectors from 3D models. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/ICCV.2015.151"},{"key":"1103_CR92","volume-title":"Physically based rendering: From theory to implementation","author":"M Pharr","year":"2004","unstructured":"Pharr, M., & Humphreys, G. (2004). Physically based rendering: From theory to implementation. San Francisco: Morgan Kaufmann."},{"key":"1103_CR93","doi-asserted-by":"crossref","unstructured":"Pishchulin, L., Jain, A., Andriluka, M., Thorm\u00e4hlen, T., & Schiele, B. (2012). Articulated people detection and pose estimation: Reshaping the future. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2012.6248052"},{"key":"1103_CR94","doi-asserted-by":"crossref","unstructured":"Pishchulin, L., Jain, A., Wojek, C., Andriluka, M., Thorm\u00e4hlen, T., & Schiele, B. (2011). Learning people detection models from few training samples. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2011.5995574"},{"key":"1103_CR95","doi-asserted-by":"crossref","unstructured":"Qi, C. R., Su, H., Niessner, M., Dai, A., Yan, M., & Guibas, L. J. (2016). Volumetric and multi-view cnns for object classification on 3D data. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.609"},{"key":"1103_CR96","unstructured":"Qiu, W. (2016). Generating human images and ground truth using computer graphics. Ph.D. thesis, University of California, Los Angeles."},{"key":"1103_CR97","unstructured":"Qiu, W., & Yuille, A. (2016). Unrealcv: Connecting computer vision to unreal engine. arXiv preprint arXiv:1609.01326 ."},{"issue":"10","key":"1103_CR98","doi-asserted-by":"publisher","first-page":"1640","DOI":"10.1109\/JPROC.2008.928932","volume":"96","author":"F Qureshi","year":"2008","unstructured":"Qureshi, F., & Terzopoulos, D. (2008). Smart camera networks in virtual reality. Proceedings of the IEEE, 96(10), 1640\u20131656.","journal-title":"Proceedings of the IEEE"},{"key":"1103_CR99","unstructured":"Radford, A., Metz, L., & Chintala, S. (2015). Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434 ."},{"key":"1103_CR100","doi-asserted-by":"crossref","unstructured":"Rahmani, H., & Mian, A. (2015). Learning a non-linear knowledge transfer model for cross-view action recognition. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298860"},{"key":"1103_CR101","doi-asserted-by":"crossref","unstructured":"Rahmani, H., & Mian, A. (2016). 3D action recognition from novel viewpoints. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.167"},{"key":"1103_CR102","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR103","doi-asserted-by":"crossref","unstructured":"Richter, S. R., Vineet, V., Roth, S., & Koltun, V. (2016). Playing for data: Ground truth from computer games. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46475-6_7"},{"key":"1103_CR104","unstructured":"Roberto\u00a0de Souza, C., Gaidon, A., Cabon, Y., & Manuel\u00a0Lopez, A. (2017). Procedural generation of videos to train deep action recognition networks. In Conference on computer vision and pattern recognition (CVPR)."},{"key":"1103_CR105","unstructured":"Rogez, G., & Schmid, C. (2016). Mocap-guided data augmentation for 3D pose estimation in the wild. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR106","doi-asserted-by":"crossref","unstructured":"Romero, J., Loper, M., & Black, M. J. (2015). Flowcap: 2D human pose from optical flow. In German conference on pattern recognition.","DOI":"10.1007\/978-3-319-24947-6_34"},{"key":"1103_CR107","doi-asserted-by":"crossref","unstructured":"Ros, G., Sellart, L., Materzynska, J., Vazquez, D., & Lopez, A.M. (2016). The synthia dataset: A large collection of synthetic images for semantic segmentation of urban scenes. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.352"},{"key":"1103_CR108","doi-asserted-by":"crossref","unstructured":"Roy, A., & Todorovic, S. (2016). A multi-scale cnn for affordance segmentation in rgb images. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46493-0_12"},{"issue":"3","key":"1103_CR109","first-page":"1218","volume":"25","author":"I Sato","year":"2003","unstructured":"Sato, I., Sato, Y., & Ikeuchi, K. (2003). Illumination from shadows. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 25(3), 1218\u20131227.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR110","doi-asserted-by":"crossref","unstructured":"Shakhnarovich, G., Viola, P., & Darrell, T. (2003). Fast pose estimation with parameter-sensitive hashing. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2003.1238424"},{"key":"1103_CR111","doi-asserted-by":"publisher","DOI":"10.1201\/9781420041484","volume-title":"Digital color imaging handbook","author":"G Sharma","year":"2002","unstructured":"Sharma, G., & Bala, R. (2002). Digital color imaging handbook. Boca Raton: CRC Press."},{"issue":"1","key":"1103_CR112","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1145\/2398356.2398381","volume":"56","author":"J Shotton","year":"2013","unstructured":"Shotton, J., Sharp, T., Kipman, A., Fitzgibbon, A., Finocchio, M., Blake, A., et al. (2013). Real-time human pose recognition in parts from single depth images. Communications of the ACM, 56(1), 116\u2013124.","journal-title":"Communications of the ACM"},{"key":"1103_CR113","doi-asserted-by":"crossref","unstructured":"Silberman, N., Hoiem, D., Kohli, P., & Fergus, R. (2012). Indoor segmentation and support inference from rgbd images. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"1103_CR114","doi-asserted-by":"crossref","unstructured":"Song, S., & Xiao, J. (2014). Sliding shapes for 3D object detection in depth images. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-10599-4_41"},{"key":"1103_CR115","unstructured":"Song, S., Yu, F., Zeng, A., Chang, A. X., Savva, M., & Funkhouser, T. (2014). Semantic scene completion from a single depth image. In Conference on computer vision and pattern recognition (CVPR)."},{"key":"1103_CR116","doi-asserted-by":"crossref","unstructured":"Stark, L., & Bowyer, K. (1991). Achieving generalized object recognition through reasoning about association of function to structure. Transactions on Pattern Analysis and Machine Intelligence (TPAMI),13(10), 1097\u20131104.","DOI":"10.1109\/34.99242"},{"key":"1103_CR117","doi-asserted-by":"crossref","unstructured":"Stark, M., Goesele, M., & Schiele, B. (2010). Back to the future: Learning shape models from 3D cad data. In British machine vision conference (BMVC).","DOI":"10.5244\/C.24.106"},{"issue":"4","key":"1103_CR118","first-page":"37","volume":"33","author":"H Su","year":"2014","unstructured":"Su, H., Huang, Q., Mitra, N. J., Li, Y., & Guibas, L. (2014). Estimating image depth using shape collections. ACM Transactions on Graphics (TOG), 33(4), 37.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"1103_CR119","doi-asserted-by":"crossref","unstructured":"Su, H., Qi, C. R., Li, Y., & Guibas, L. J. (2015). Render for cnn: Viewpoint estimation in images using cnns trained with rendered 3d model views. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.308"},{"key":"1103_CR120","doi-asserted-by":"crossref","unstructured":"Sun, B., & Saenko, K. (2014). From virtual to reality: Fast adaptation of virtual object detectors to real domains. In British machine vision conference (BMVC).","DOI":"10.5244\/C.28.82"},{"key":"1103_CR121","doi-asserted-by":"crossref","unstructured":"Sun, C., Shrivastava, A., Singh, S., & Gupta, A. (2017). Revisiting unreasonable effectiveness of data in deep learning era. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.97"},{"key":"1103_CR122","doi-asserted-by":"crossref","unstructured":"Terzopoulos, D., & Rabie, T. F. (1995). Animat vision: Active vision in artificial animals. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.1995.466856"},{"key":"1103_CR123","doi-asserted-by":"crossref","unstructured":"Torralba, A., & Efros, A.A. (2011). Unbiased look at dataset bias. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"1103_CR124","doi-asserted-by":"crossref","unstructured":"Tzeng, E., Hoffman, J., Darrell, T., & Saenko, K. (2015). Simultaneous deep transfer across domains and tasks. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.463"},{"key":"1103_CR125","volume-title":"Light vision color","author":"A Valberg","year":"2007","unstructured":"Valberg, A. (2007). Light vision color. New York: Wiley."},{"key":"1103_CR126","doi-asserted-by":"crossref","unstructured":"Varol, G., Romero, J., Martin, X., Mahmood, N., Black, M., Laptev, I., & Schmid, C. (2017). Learning from synthetic humans. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.492"},{"issue":"4","key":"1103_CR127","doi-asserted-by":"publisher","first-page":"797","DOI":"10.1109\/TPAMI.2013.163","volume":"36","author":"D V\u00e1zquez","year":"2014","unstructured":"V\u00e1zquez, D., Lopez, A. M., Marin, J., Ponsa, D., & Geronimo, D. (2014). Virtual and real world adaptation for pedestrian detection. Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 36(4), 797\u2013809.","journal-title":"Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1103_CR128","doi-asserted-by":"crossref","unstructured":"Wang, X., Fouhey, D., & Gupta, A. (2015). Designing deep networks for surface normal estimation. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298652"},{"key":"1103_CR129","unstructured":"Wang, X., & Gupta, A. (2016). Generative image modeling using style and structure adversarial networks. arXiv preprint arXiv:1603.05631 ."},{"key":"1103_CR130","unstructured":"Wang, Z., Merel, J. S., Reed, S. E., de\u00a0Freitas, N., Wayne, G., & Heess, N. (2017). Robust imitation of diverse behaviors. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR131","doi-asserted-by":"crossref","unstructured":"Weinberger, K., Dasgupta, A., Langford, J., Smola, A., & Attenberg, J. (2009). Feature hashing for large scale multitask learning. In International conference on machine learning (ICML).","DOI":"10.1145\/1553374.1553516"},{"key":"1103_CR132","unstructured":"Whelan, T., Leutenegger, S., Salas-Moreno, R. F., Glocker, B., & Davison, A. J. (2015). Elasticfusion: Dense slam without a pose graph. In Robotics: Science and systems (RSS)."},{"key":"1103_CR133","unstructured":"Wu, J. (2016). Computational perception of physical object properties. Ph.D. thesis, Massachusetts Institute of Technology."},{"key":"1103_CR134","unstructured":"Wu, J., Yildirim, I., Lim, J. J., Freeman, B., & Tenenbaum, J. (2015). Galileo: Perceiving physical object properties by integrating a physics engine with deep learning. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR135","unstructured":"Xiao, J., Russell, B., & Torralba, A. (2012). Localizing 3D cuboids in single-view images. In Advances in neural information processing systems (NIPS)."},{"key":"1103_CR136","unstructured":"Xie, J., Lu, Y., Zhu, S. C., & Wu, Y. N. (2016). Cooperative training of descriptor and generator networks. arXiv preprint arXiv:1609.09408 ."},{"key":"1103_CR137","unstructured":"Xie, J., Lu, Y., Zhu, S. C., & Wu, Y. N. (2016). A theory of generative convnet. In International conference on machine learning (ICML)."},{"key":"1103_CR138","first-page":"35","volume":"8","author":"Y Xue","year":"2007","unstructured":"Xue, Y., Liao, X., Carin, L., & Krishnapuram, B. (2007). Multi-task learning for classification with dirichlet process priors. Journal of Machine Learning Research, 8, 35\u201363.","journal-title":"Journal of Machine Learning Research"},{"key":"1103_CR139","doi-asserted-by":"crossref","unstructured":"Yasin, H., Iqbal, U., Kr\u00fcger, B., Weber, A., & Gall, J. (2016). A dual-source approach for 3d pose estimation from a single image. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.535"},{"key":"1103_CR140","unstructured":"Yeh, Y. T., Yang, L., Watson, M., Goodman, N. D.,&Hanrahan, P. (2012). Synthesizing open worlds with constraints using locally annealed reversible jump mcmc. ACM Transactions on Graphics (TOG), https:\/\/doi.org\/.10.1145\/2185520.2185552 ."},{"key":"1103_CR141","doi-asserted-by":"crossref","unstructured":"Yu, K., Tresp, V., & Schwaighofer, A. (2005). Learning Gaussian processes from multiple tasks. In International conference on machine learning (ICML).","DOI":"10.1145\/1102351.1102479"},{"key":"1103_CR142","doi-asserted-by":"crossref","unstructured":"Yu, L. F., Duncan, N., & Yeung, S. K. (2015). Fill and transfer: A simple physics-based approach for containability reasoning. In International conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.88"},{"issue":"4","key":"1103_CR143","doi-asserted-by":"publisher","first-page":"786","DOI":"10.1145\/2010324.1964981","volume":"30","author":"LF Yu","year":"2011","unstructured":"Yu, L. F., Yeung, S. K., Tang, C. K., Terzopoulos, D., Chan, T. F., & Osher, S. J. (2011). Make it home: Automatic optimization of furniture arrangement. ACM Transactions on Graphics (TOG), 30(4), 786\u2013797.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"2","key":"1103_CR144","doi-asserted-by":"publisher","first-page":"1138","DOI":"10.1109\/TVCG.2015.2417575","volume":"22","author":"LF Yu","year":"2016","unstructured":"Yu, L. F., Yeung, S. K., & Terzopoulos, D. (2016). The clutterpalette: An interactive tool for detailing indoor scenes. IEEE Transactions on Visualization & Computer Graph (TVCG), 22(2), 1138\u20131148.","journal-title":"IEEE Transactions on Visualization & Computer Graph (TVCG)"},{"key":"1103_CR145","doi-asserted-by":"crossref","unstructured":"Zhang, H., Dana, K., & Nishino, K. (2015). Reflectance hashing for material recognition. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298926"},{"key":"1103_CR146","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Song, S., Yumer, E., Savva, M., Lee, J. Y., Jin, H., & Funkhouser, T. (2017). Physically-based rendering for indoor scene understanding using convolutional neural networks. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.537"},{"key":"1103_CR147","doi-asserted-by":"crossref","unstructured":"Zhao, Y., & Zhu, S. C. (2013). Scene parsing by integrating function, geometry and appearance models. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2013.401"},{"issue":"2","key":"1103_CR148","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/s11263-014-0795-4","volume":"112","author":"B Zheng","year":"2015","unstructured":"Zheng, B., Zhao, Y., Yu, J., Ikeuchi, K., & Zhu, S. C. (2015). Scene understanding by reasoning stability and safety. International Journal of Computer Vision (IJCV), 112(2), 221\u2013238.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1103_CR149","doi-asserted-by":"crossref","unstructured":"Zheng, B., Zhao, Y., Yu, J. C., Ikeuchi, K., & Zhu, S. C. (2013). Beyond point clouds: Scene understanding by reasoning geometry and physics. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2013.402"},{"key":"1103_CR150","doi-asserted-by":"crossref","unstructured":"Zhou, T., Kr\u00e4henb\u00fchl, P., Aubry, M., Huang, Q., & Efros, A. A. (2016). Learning dense correspondence via 3D-guided cycle consistency. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1007\/978-3-662-49373-1"},{"key":"1103_CR151","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhu, M., Leonardos, S., Derpanis, K. G., & Daniilidis, K. (2016). Sparseness meets deepness: 3D human pose estimation from monocular video. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1007\/978-3-662-49373-1"},{"key":"1103_CR152","volume-title":"A stochastic grammar of images","author":"SC Zhu","year":"2007","unstructured":"Zhu, S. C., & Mumford, D. (2007). A stochastic grammar of images. Breda: Now Publishers Inc."},{"key":"1103_CR153","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Fathi, A., & Fei-Fei, L. (2014). Reasoning about object affordances in a knowledge base representation. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-10605-2_27"},{"key":"1103_CR154","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Jiang, C., Zhao, Y., Terzopoulos, D., & Zhu, S. C. (2016). Inferring forces and learning human utilities from videos. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1007\/978-3-662-49373-1"},{"key":"1103_CR155","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Mottaghi, R., Kolve, E., Lim, J. J., Gupta, A., Fei-Fei, L., & Farhadi, A. (2017). Target-driven visual navigation in indoor scenes using deep reinforcement learning. In International conference on robotics and automation (ICRA).","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"1103_CR156","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Zhao, Y., & Zhu, S. C. (2015). Understanding tools: Task-oriented object modeling, learning and recognition. In Conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298903"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-018-1103-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-018-1103-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-018-1103-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,3]],"date-time":"2023-09-03T12:49:02Z","timestamp":1693745342000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-018-1103-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,30]]},"references-count":156,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2018,9]]}},"alternative-id":["1103"],"URL":"https:\/\/doi.org\/10.1007\/s11263-018-1103-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2018,6,30]]},"assertion":[{"value":"30 July 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 June 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}