{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T13:17:26Z","timestamp":1771679846321,"version":"3.50.1"},"reference-count":56,"publisher":"American Association for the Advancement of Science (AAAS)","issue":"30","funder":[{"DOI":"10.13039\/100004359","name":"Sony Electronics Inc.","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004359","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006502","name":"Defense Sciences Office, DARPA","doi-asserted-by":"publisher","award":["FA8750-18-2-0126"],"award-info":[{"award-number":["FA8750-18-2-0126"]}],"id":[{"id":"10.13039\/100006502","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Sci. Robot."],"published-print":{"date-parts":[[2019,5,22]]},"abstract":"<jats:p>A robotic agent learns how to look around novel environments intelligently by directing the camera to best complete its observations.<\/jats:p>","DOI":"10.1126\/scirobotics.aaw6326","type":"journal-article","created":{"date-parts":[[2019,5,15]],"date-time":"2019-05-15T23:17:52Z","timestamp":1557962272000},"source":"Crossref","is-referenced-by-count":29,"title":["Emergence of exploratory look-around behaviors through active observation completion"],"prefix":"10.1126","volume":"4","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2833-7038","authenticated-orcid":true,"given":"Santhosh K.","family":"Ramakrishnan","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Texas at Austin, Austin, TX, USA."},{"name":"Facebook AI Research, Austin, TX, USA."}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6888-3095","authenticated-orcid":true,"given":"Dinesh","family":"Jayaraman","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering and Computer Science, University of California, Berkeley, Berkeley, CA, USA."}]},{"given":"Kristen","family":"Grauman","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Texas at Austin, Austin, TX, USA."},{"name":"Facebook AI Research, Austin, TX, USA."}]}],"member":"221","reference":[{"key":"e_1_3_2_2_2","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"ImageNet large scale visual recognition challenge","volume":"115","author":"Russakovsky O.","year":"2015","unstructured":"O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh, S. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein, A. C. Berg, L. Fei-Fei, ImageNet large scale visual recognition challenge. Int. J. Comp. Vis. 115, 211\u2013252 (2015).","journal-title":"Int. J. Comp. Vis."},{"key":"e_1_3_2_3_2","first-page":"740","article-title":"Microsoft COCO: Common objects in context","volume":"8693","author":"Lin T.-Y.","year":"2014","unstructured":"T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, C. L. Zitnick, Microsoft COCO: Common objects in context. Eur. Conf. Comp. Vis. 8693, 740\u2013755 (2014).","journal-title":"Eur. Conf. Comp. Vis."},{"key":"e_1_3_2_4_2","unstructured":"K. Soomro A. R. Zamir M. Shah Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv:1212.0402 [cs.CV] (3 December 2012)."},{"key":"e_1_3_2_5_2","doi-asserted-by":"crossref","first-page":"1230","DOI":"10.1111\/j.1467-8624.2008.01185.x","article-title":"Development of three-dimensional object completion in infancy","volume":"79","author":"Soska K. C.","year":"2008","unstructured":"K. C. Soska, S. P. Johnson, Development of three-dimensional object completion in infancy. Child Dev. 79, 1230\u20131236 (2008).","journal-title":"Child Dev."},{"key":"e_1_3_2_6_2","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1037\/a0014618","article-title":"Systems in development: Motor skill acquisition facilitates three-dimensional object completion","volume":"46","author":"Soska K. C.","year":"2010","unstructured":"K. C. Soska, K. E. Adolph, S. P. Johnson, Systems in development: Motor skill acquisition facilitates three-dimensional object completion. Dev. Psychol. 46, 129\u2013138 (2010).","journal-title":"Dev. Psychol."},{"key":"e_1_3_2_7_2","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1016\/0010-0285(83)90017-8","article-title":"Perception of partly occluded objects in infancy","volume":"15","author":"Kellman P. J.","year":"1983","unstructured":"P. J. Kellman, E. S. Spelke, Perception of partly occluded objects in infancy. Cogn. Psychol. 15, 483\u2013524 (1983).","journal-title":"Cogn. Psychol."},{"key":"e_1_3_2_8_2","doi-asserted-by":"crossref","first-page":"766","DOI":"10.1037\/0033-295X.113.4.766","article-title":"Contextual guidance of eye movements and attention in real-world scenes: The role of global features in object search","volume":"113","author":"Torralba A.","year":"2006","unstructured":"A. Torralba, A. Oliva, M. S. Castelhano, J. M. Henderson, Contextual guidance of eye movements and attention in real-world scenes: The role of global features in object search. Psychol. Rev. 113, 766\u2013786 (2006).","journal-title":"Psychol. Rev."},{"key":"e_1_3_2_9_2","doi-asserted-by":"crossref","unstructured":"D. Jayaraman K. Grauman Look-ahead before you leap: End-to-end active recognition by forecasting the effect of motion in European Conference on Computer Vision (Springer 2016).","DOI":"10.1007\/978-3-319-46454-1_30"},{"key":"e_1_3_2_10_2","doi-asserted-by":"crossref","unstructured":"M. Malmir K. Sikka D. Forster J. R. Movellan G. Cottrell Deep Q-learning for active recognition of GERMS: Baseline performance on a standardized dataset for active learning. British Machine Vision Conference (BMVA 2015).","DOI":"10.5244\/C.29.161"},{"key":"e_1_3_2_11_2","unstructured":"Z. Wu S. Song A. Khosla F. Yu L. Zhang X. Tang J. Xiao 3D ShapeNets: A deep representation for volumetric shapes in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2015)."},{"key":"e_1_3_2_12_2","doi-asserted-by":"crossref","unstructured":"P. Ammirato P. Poirson E. Park J. Ko\u0161eck\u00e1 A. C. Berg A dataset for developing and benchmarking active vision in IEEE International Conference on Robotics and Automation (IEEE 2017).","DOI":"10.1109\/ICRA.2017.7989164"},{"key":"e_1_3_2_13_2","doi-asserted-by":"crossref","unstructured":"S. Yeung O. Russakovsky G. Mori L. Fei-Fei End-to-end learning of action detection from frame glimpses in videos in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2016).","DOI":"10.1109\/CVPR.2016.293"},{"key":"e_1_3_2_14_2","doi-asserted-by":"crossref","unstructured":"S. Mathe A. Pirinen C. Sminchisescu Reinforcement learning for visual object detection in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2016).","DOI":"10.1109\/CVPR.2016.316"},{"key":"e_1_3_2_15_2","unstructured":"S. Karayev T. Baumgartner M. Fritz T. Darrell Timely object recognition in Advances in Neural Information Processing Systems (Curran Associates Inc. 2012)."},{"key":"e_1_3_2_16_2","doi-asserted-by":"crossref","unstructured":"D. Pathak P. Agrawal A. A. Efros T. Darrell Curiosity-driven exploration by self-supervised prediction in International Conference on Machine Learning (PMLR 2017).","DOI":"10.1109\/CVPRW.2017.70"},{"key":"e_1_3_2_17_2","unstructured":"T. Chen S. Gupta A. Gupta Learning exploration policies for navigation in International Conference on Learning Representations (2019)."},{"key":"e_1_3_2_18_2","doi-asserted-by":"crossref","unstructured":"B. Hepp D. Dey S. N. Sinha A. Kapoor N. Joshi O. Hilliges Learn-to-score: Efficient 3D scene exploration by predicting view utility in European Conference on Computer Vision (Springer 2018).","DOI":"10.1007\/978-3-030-01267-0_27"},{"key":"e_1_3_2_19_2","doi-asserted-by":"crossref","unstructured":"S. Song A. Zeng A. X. Chang M. Savva S. Savarese T. Funkhouser Im2pano3D: Extrapolating 360\u00b0 structure and semantics beyond the field of view in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2018).","DOI":"10.1109\/CVPR.2018.00405"},{"key":"e_1_3_2_20_2","doi-asserted-by":"crossref","unstructured":"D. Ji J. Kwon M. McFarland S. Savarese Deep view morphing in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2017).","DOI":"10.1109\/CVPR.2017.750"},{"key":"e_1_3_2_21_2","unstructured":"T. D. Kulkarni W. Whitney P. Kohli J. B. Tenenbaum Deep convolutional inverse graphics network in Advances in Neural Information Processing Systems (Curran Associates Inc. 2015)."},{"key":"e_1_3_2_22_2","doi-asserted-by":"crossref","unstructured":"D. Jayaraman R. Gao K. Grauman ShapeCodes: Self-supervised feature learning by lifting views to viewgrids in European Conference on Computer Vision (Springer 2018).","DOI":"10.1007\/978-3-030-01270-0_8"},{"key":"e_1_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6170"},{"key":"e_1_3_2_24_2","doi-asserted-by":"crossref","unstructured":"D. Jayaraman K. Grauman Learning to look around: Intelligently exploring unseen environments for unknown tasks in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2018).","DOI":"10.1109\/CVPR.2018.00135"},{"key":"e_1_3_2_25_2","doi-asserted-by":"crossref","unstructured":"S. K. Ramakrishnan K. Grauman Sidekick policy learning for active visual exploration in European Conference on Computer Vision (Springer 2018).","DOI":"10.1007\/978-3-030-01258-8_26"},{"key":"e_1_3_2_26_2","unstructured":"For simplicity of presentation we represent an \u201cenvironment\u201d as X where the agent explores a novel scene looking outward in new viewing directions. However experiments will also use X as an object where the agent moves around an object looking inward at it from new viewing angles. Figure 1 illustrates the two scenarios."},{"key":"e_1_3_2_27_2","doi-asserted-by":"crossref","unstructured":"E. Johns S. Leutenegger A. J. Davison Pairwise decomposition of image sequences for active multi-view recognition in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2016).","DOI":"10.1109\/CVPR.2016.414"},{"key":"e_1_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Y. Zhu D. Gordon E. Kolve D. Fox L. Fei-Fei A. Gupta R. Mottaghi A. Farhadi Visual semantic planning using deep successor representations in IEEE International Conference on Computer Vision (IEEE 2017).","DOI":"10.1109\/ICCV.2017.60"},{"key":"e_1_3_2_29_2","unstructured":"S. Gupta D. Fouhey S. Levine J. Malik Unifying map and landmark based representations for visual navigation. arXiv:1712.08125 [cs.CV] (21 December 2017)."},{"key":"e_1_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Y. Zhu R. Mottaghi E. Kolve J. J. Lim A. Gupta L. Fei-Fei A. Farhadi Target-driven visual navigation in indoor scenes using deep reinforcement learning in IEEE International Conference on Robotics and Automation (IEEE 2017).","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"e_1_3_2_31_2","article-title":"End-to-end policy learning for active visual categorization","author":"Jayaraman D.","year":"2018","unstructured":"D. Jayaraman, K. Grauman, End-to-end policy learning for active visual categorization. IEEE Trans. Pattern Anal. Mach. Intell. (2018).","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"e_1_3_2_32_2","unstructured":"X. Guo S. Singh H. Lee R. Lewis X. Wang Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning in Advances in Neural Information Processing Systems (Curran Associates Inc. 2014)."},{"key":"e_1_3_2_33_2","doi-asserted-by":"crossref","unstructured":"V. Vapnik R. Izmailov Learning with intelligent teacher in Symposium on Conformal and Probabilistic Prediction with Applications (Springer 2016).","DOI":"10.1007\/978-3-319-33395-3_1"},{"key":"e_1_3_2_34_2","unstructured":"J. Xiao K. A. Ehinger A. Oliva A. Torralba Recognizing scene viewpoint using panoramic place representation in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2012)."},{"key":"e_1_3_2_35_2","unstructured":"The angles were selected to break symmetry and reduce redundancy of views."},{"key":"e_1_3_2_36_2","unstructured":"For the sake of brevity we report the best performances among the two sidekick variants we proposed in ( 24 )."},{"key":"e_1_3_2_37_2","doi-asserted-by":"crossref","unstructured":"J. Harel C. Koch P. Perona Graph-based visual saliency in Advances in Neural Information Processing Systems (MIT Press 2006).","DOI":"10.7551\/mitpress\/7503.003.0073"},{"key":"e_1_3_2_38_2","unstructured":"We refine the decoded viewgrids (for both our method and the baseline) with a pix2pix ( 52 )\u2013style conditional generative adversarial network (GAN) detailed in the Supplementary Materials."},{"key":"e_1_3_2_39_2","doi-asserted-by":"crossref","unstructured":"C. B. Choy D. Xu J. Gwak K. Chen S. Savarese 3D-R2N2: A unified approach for single and multi-view 3D object reconstruction in Proceedings of the European Conference on Computer Vision (ECCV) (Springer 2016).","DOI":"10.1007\/978-3-319-46484-8_38"},{"key":"e_1_3_2_40_2","doi-asserted-by":"crossref","unstructured":"H. Fan H. Su L. Guibas A point set generation network for 3D object reconstruction from a single image in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2017).","DOI":"10.1109\/CVPR.2017.264"},{"key":"e_1_3_2_41_2","doi-asserted-by":"crossref","unstructured":"N. Wang Y. Zhang Z. Li Y. Fu W. Liu Y.-G. Jiang Pixel2mesh: Generating 3D mesh models from single RGB images. arXiv:1804.01654 [cs.CV] (5 April 2018).","DOI":"10.1007\/978-3-030-01252-6_4"},{"key":"e_1_3_2_42_2","unstructured":"A. Dosovitskiy G. Ros F. Codevilla A. Lopez V. Koltun CARLA: An open urban driving simulator in Conference on Robot Learning (PMLR 2017)."},{"key":"e_1_3_2_43_2","doi-asserted-by":"crossref","unstructured":"L. Pinto M. Andrychowicz P. Welinder W. Zaremba P. Abbeel Asymmetric actor critic for image-based robot learning in Robotics: Science and Systems (Robotics Proceedings 2018).","DOI":"10.15607\/RSS.2018.XIV.008"},{"key":"e_1_3_2_44_2","doi-asserted-by":"crossref","unstructured":"A. Das S. Datta G. Gkioxari S. Lee D. Parikh D. Batra Embodied question answering in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2018).","DOI":"10.1109\/CVPR.2018.00008"},{"key":"e_1_3_2_45_2","first-page":"525","article-title":"SphereNet: Learning spherical representations for detection and classification in omnidirectional images","volume":"11213","author":"Coors B.","year":"2018","unstructured":"B. Coors, A. P. Condurache, A. Geiger, SphereNet: Learning spherical representations for detection and classification in omnidirectional images. Proc. Eur. Conf. Comput. Vis. 11213, 525\u2013541 (2018).","journal-title":"Proc. Eur. Conf. Comput. Vis."},{"key":"e_1_3_2_46_2","unstructured":"Y. Wu Y. Wu G. Gkioxari Y. Tian Building generalizable agents with a realistic and rich 3D environment. arXiv:1801.02209 [cs.LG] (7 January 2018)."},{"key":"e_1_3_2_47_2","doi-asserted-by":"crossref","unstructured":"P. Anderson Q. Wu D. Teney J. Bruce M. Johnson N. S\u00fcnderhauf I. Reid S. Gould A. van den Hengel Vision-and-language navigation: Interpreting visually-grounded navigation instructions in real environments in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2018).","DOI":"10.1109\/CVPR.2018.00387"},{"key":"e_1_3_2_48_2","unstructured":"N. Savinov A. Dosovitskiy V. Koltun Semi-parametric topological memory for navigation in International Conference on Learning Representations (2018)."},{"key":"e_1_3_2_49_2","unstructured":"D. Ha J. Schmidhuber World models. arXiv:1803.10122 [cs.LG] (27 March 2018)."},{"key":"e_1_3_2_50_2","doi-asserted-by":"crossref","unstructured":"A. J. Piergiovanni A. Wu M. S. Ryoo Learning real-world robot policies by dreaming. arXiv:1805.07813 [cs.RO] (20 May 2018).","DOI":"10.1109\/IROS40897.2019.8967559"},{"key":"e_1_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_52_2","unstructured":"R. S. Sutton A. G. Barto Reinforcement Learning: An Introduction (MIT Press 2014)."},{"key":"e_1_3_2_53_2","doi-asserted-by":"crossref","unstructured":"P. Isola J.-Y. Zhu T. Zhou A. A. Efros Image-to-image translation with conditional adversarial networks in IEEE Conference on Computer Vision and Pattern Recognition (IEEE 2017).","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_54_2","unstructured":"M. Bojarski D. Del Testa D. Dworakowski B. Firner B. Flepp P. Goyal L. D. Jackel M. Monfort U. Muller J. Zhang X. Zhang J. Zhao K. Zieba End to end learning for self-driving cars. arXiv:1604.07316 [cs.CV] (25 April 2016)."},{"key":"e_1_3_2_55_2","doi-asserted-by":"crossref","first-page":"661","DOI":"10.1109\/LRA.2015.2509024","article-title":"A machine learning approach to visual perception of forest trails for mobile robots","volume":"1","author":"Giusti A.","year":"2016","unstructured":"A. Giusti, J. Guzzi, D. C. Cire\u015fan, F.-L. He, J. P. Rodr\u00edguez, F. Fontana, M. Faessler, C. Forster, J. Schmidhuber, G. Di Caro, D. Scaramuzza, L. M. Gambardella, A machine learning approach to visual perception of forest trails for mobile robots. IEEE Robot. Autom. Lett. 1, 661\u2013667 (2016).","journal-title":"IEEE Robot. Autom. Lett."},{"key":"e_1_3_2_56_2","unstructured":"Y. Duan M. Andrychowicz B. C. Stadie J. Ho J. Schneider I. Sutskever P. Abbeel W. Zaremba One-shot imitation learning in Advances in Neural Information Processing Systems (Curran Associates Inc. 2017)."},{"key":"e_1_3_2_57_2","unstructured":"I. Goodfellow J. Pouget-Abadie M. Mirza B. Xu D. Warde-Farley S. Ozair A. Courville Y. Bengio Generative adversarial nets in Advances in Neural Information Processing Systems (Curran Associates Inc. 2014)."}],"container-title":["Science Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/syndication.highwire.org\/content\/doi\/10.1126\/scirobotics.aaw6326","content-type":"unspecified","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/www.science.org\/doi\/pdf\/10.1126\/scirobotics.aaw6326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,16]],"date-time":"2024-01-16T12:06:56Z","timestamp":1705406816000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.science.org\/doi\/10.1126\/scirobotics.aaw6326"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,22]]},"references-count":56,"journal-issue":{"issue":"30","published-print":{"date-parts":[[2019,5,22]]}},"alternative-id":["10.1126\/scirobotics.aaw6326"],"URL":"https:\/\/doi.org\/10.1126\/scirobotics.aaw6326","relation":{},"ISSN":["2470-9476"],"issn-type":[{"value":"2470-9476","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,22]]},"article-number":"eaaw6326"}}