{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T02:57:01Z","timestamp":1769741821319,"version":"3.49.0"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319464749","type":"print"},{"value":"9783319464756","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46475-6_1","type":"book-chapter","created":{"date-parts":[[2016,9,16]],"date-time":"2016-09-16T08:48:10Z","timestamp":1474015690000},"page":"3-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":58,"title":["The Curious Robot: Learning Visual Representations via Physical Interactions"],"prefix":"10.1007","author":[{"given":"Lerrel","family":"Pinto","sequence":"first","affiliation":[]},{"given":"Dhiraj","family":"Gandhi","sequence":"additional","affiliation":[]},{"given":"Yuanfeng","family":"Han","sequence":"additional","affiliation":[]},{"given":"Yong-Lae","family":"Park","sequence":"additional","affiliation":[]},{"given":"Abhinav","family":"Gupta","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,9,17]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Doersch, C., Gupta, A., Efros, A.A.: Unsupervised visual representation learning by context prediction. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1422\u20131430 (2015)","DOI":"10.1109\/ICCV.2015.167"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Wang, X., Gupta, A.: Unsupervised learning of visual representations using videos. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2794\u20132802 (2015)","DOI":"10.1109\/ICCV.2015.320"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Jayaraman, D., Grauman, K.: Learning image representations tied to ego-motion. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1413\u20131421 (2015)","DOI":"10.1109\/ICCV.2015.166"},{"key":"1_CR4","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes (2013). arXiv preprint arXiv:1312.6114"},{"key":"1_CR5","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"1_CR6","unstructured":"Denton, E.L., Chintala, S., Fergus, R., et al.: Deep generative image models using Laplacian pyramid of adversarial networks. In: Advances in Neural Information Processing Systems, pp. 1486\u20131494 (2015)"},{"key":"1_CR7","unstructured":"Radford, A., Metz, L., Chintala, S.: Unsupervised representation learning with deep convolutional generative adversarial networks (2015). arXiv preprint arXiv:1511.06434"},{"key":"1_CR8","unstructured":"Salakhutdinov, R., Hinton, G.E.: Deep Boltzmann machines. In: International Conference on Artificial Intelligence and Statistics, pp. 448\u2013455 (2009)"},{"key":"1_CR9","first-page":"153","volume":"19","author":"Y Bengio","year":"2007","unstructured":"Bengio, Y., Lamblin, P., Popovici, D., Larochelle, H., et al.: Greedy layer-wise training of deep networks. Adv. Neural Inf. Process. Syst. 19, 153 (2007)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1_CR10","doi-asserted-by":"crossref","first-page":"318","DOI":"10.1007\/978-3-319-46493-0_20","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Xiaolong Wang","year":"2016","unstructured":"Wang, X., Gupta, A.: Generative image modeling using style and structure adversarial networks. In: ECCV (2016)"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Agrawal, P., Carreira, J., Malik, J.: Learning to see by moving. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 37\u201345 (2015)","DOI":"10.1109\/ICCV.2015.13"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Mobahi, H., Collobert, R., Weston, J.: Deep learning from temporal coherence in video. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 737\u2013744. ACM (2009)","DOI":"10.1145\/1553374.1553469"},{"key":"1_CR13","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Advances in neural information processing systems, pp. 2366\u20132374 (2014)"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Wang, X., Fouhey, D., Gupta, A.: Designing deep networks for surface normal estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 539\u2013547 (2015)","DOI":"10.1109\/CVPR.2015.7298652"},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Walker, J., Gupta, A., Hebert, M.: Dense optical flow prediction from a static image. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2443\u20132451 (2015)","DOI":"10.1109\/ICCV.2015.281"},{"issue":"5","key":"1_CR16","doi-asserted-by":"publisher","first-page":"872","DOI":"10.1037\/h0040546","volume":"56","author":"R Held","year":"1963","unstructured":"Held, R., Hein, A.: Movement-produced stimulation in the development of visually guided behavior. J. Comp. Physiol. Psychol. 56(5), 872 (1963)","journal-title":"J. Comp. Physiol. Psychol."},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Bicchi, A., Kumar, V.: Robotic grasping and contact: a review. In: ICRA, pp. 348\u2013353, Citeseer (2000)","DOI":"10.1109\/ROBOT.2000.844081"},{"issue":"2","key":"1_CR18","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1109\/TRO.2013.2289018","volume":"30","author":"J Bohg","year":"2014","unstructured":"Bohg, J., Morales, A., Asfour, T., Kragic, D.: Data-driven grasp synthesis a survey. IEEE Trans. Robot. 30(2), 289\u2013309 (2014)","journal-title":"IEEE Trans. Robot."},{"key":"1_CR19","unstructured":"Lenz, I., Lee, H., Saxena, A.: Deep learning for detecting robotic grasps (2013). arXiv preprint arXiv:1301.3592"},{"key":"1_CR20","unstructured":"Morales, A., Chinellato, E., Fagg, A.H., Del Pobil, A.P.: Using experience for assessing grasp reliability. In: IJRR"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Detry, R., Baseski, E., Popovic, M., Touati, Y., Kruger, N., Kroemer, O., Peters, J., Piater, J.: Learning object-specific grasp affordance densities. In: ICDL (2009)","DOI":"10.1109\/DEVLRN.2009.5175520"},{"issue":"4","key":"1_CR22","first-page":"600","volume":"33","author":"R Paolini","year":"2014","unstructured":"Paolini, R., Rodriguez, A., Srinivasa, S., Mason, M.T.: A data-driven statistical framework for post-grasp manipulation. IJRR 33(4), 600\u2013615 (2014)","journal-title":"IJRR"},{"key":"1_CR23","unstructured":"Pinto, L., Gupta, A.: Supersizing self-supervision: learning to grasp from 50k tries and 700 robot hours (2015). arXiv preprint arXiv:1509.06825"},{"key":"1_CR24","unstructured":"Levine, S., Pastor, P., Krizhevsky, A., Quillen, D.: Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection (2016). arXiv preprint arXiv:1603.02199"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Balorda, Z.: Reducing uncertainty of objects by robot pushing. In: Proceedings of 1990 IEEE International Conference on Robotics and Automation, pp. 1051\u20131056. IEEE (1990)","DOI":"10.1109\/ROBOT.1990.126132"},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Balorda, Z.: Automatic planning of robot pushing operations. In: Proceedings of 1993 IEEE International Conference on Robotics and Automation, pp. 732\u2013737. IEEE (1993)","DOI":"10.1109\/ROBOT.1993.292065"},{"issue":"6","key":"1_CR27","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1177\/027836499601500602","volume":"15","author":"KM Lynch","year":"1996","unstructured":"Lynch, K.M., Mason, M.T.: Stable pushing: mechanics, controllability, and planning. Int. J. Robot. Res. 15(6), 533\u2013556 (1996)","journal-title":"Int. J. Robot. Res."},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Dogar, M., Srinivasa, S.: A framework for push-grasping in clutter. In: Robotics: Science and Systems VII (2011)","DOI":"10.15607\/RSS.2011.VII.009"},{"issue":"1","key":"1_CR29","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1177\/027836499301200107","volume":"12","author":"X Yun","year":"1993","unstructured":"Yun, X.: Object handling using two arms without grasping. Int. J. Robot. Res. 12(1), 99\u2013106 (1993)","journal-title":"Int. J. Robot. Res."},{"key":"1_CR30","unstructured":"Zhou, J., Paolini, R., Bagnell, J.A., Mason, M.T.: A convex polynomial force-motion model for planar sliding: Identification and application (2016)"},{"issue":"6","key":"1_CR31","doi-asserted-by":"publisher","first-page":"1319","DOI":"10.1109\/TRO.2009.2032965","volume":"25","author":"YL Park","year":"2009","unstructured":"Park, Y.L., Ryu, S.C., Black, R.J., Chau, K.K., Moslehi, B., Cutkosky, M.R.: Exoskeletal force-sensing end-effectors with embedded optical fiber-bragg-grating sensors. IEEE Trans. Robot. 25(6), 1319\u20131331 (2009)","journal-title":"IEEE Trans. Robot."},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Schneider, A., Sturm, J., Stachniss, C., Reisert, M., Burkhardt, H., Burgard, W.: Object identification with tactile sensors using bag-of-features. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS 2009, pp. 243\u2013248. IEEE (2009)","DOI":"10.1109\/IROS.2009.5354648"},{"issue":"4","key":"1_CR33","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1007\/BF00133571","volume":"1","author":"J Aloimonos","year":"1988","unstructured":"Aloimonos, J., Weiss, I., Bandyopadhyay, A.: Active vision. Int. J. Comput. Vis. 1(4), 333\u2013356 (1988)","journal-title":"Int. J. Comput. Vis."},{"key":"1_CR34","unstructured":"Wu, Z., Song, S., Khosla, A., Yu, F., Zhang, L., Tang, X., Xiao, J.: 3D shapenets: a deep representation for volumetric shapes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1912\u20131920 (2015)"},{"key":"1_CR35","unstructured":"Mahler, J., Pokorny, F.T., Hou, B., Roderick, M., Laskey, M., Aubry, M., Kohlhoff, K., Kroeger, T., Kuffner, J., Goldberg, K.: Dex-Net 1.0: a cloud-based network of 3D objects for robust grasp planning using a multi-armed bandit model with correlated rewards"},{"key":"1_CR36","unstructured":"Redmon, J., Angelova, A.: Real-time grasp detection using convolutional neural networks (2014). arXiv preprint arXiv:1412.3128"},{"key":"1_CR37","unstructured":"Levine, S., Wagener, N., Abbeel, P.: Learning contact-rich manipulation skills with guided policy search (2015). arXiv preprint arXiv:1501.05611"},{"key":"1_CR38","unstructured":"Tzeng, E., Devin, C., Hoffman, J., Finn, C., Peng, X., Levine, S., Saenko, K., Darrell, T.: Towards adapting deep visuomotor representations from simulated to real environments (2015). arXiv preprint arXiv:1511.07111"},{"issue":"117","key":"1_CR39","first-page":"240","volume":"117","author":"C Finn","year":"2015","unstructured":"Finn, C., Tan, X.Y., Duan, Y., Darrell, T., Levine, S., Abbeel, P.: Deep spatial autoencoders for visuomotor learning. Reconstruction 117(117), 240 (2015)","journal-title":"Reconstruction"},{"key":"1_CR40","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NIPS, pp. 1097\u20131105 (2012)"},{"key":"1_CR41","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR 2009, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Lai, K., Bo, L., Ren, X., Fox, D.: A large-scale hierarchical multi-view RGB-D object dataset. In: 2011 IEEE International Conference on Robotics and Automation (ICRA), pp. 1817\u20131824. IEEE (2011)","DOI":"10.1109\/ICRA.2011.5980382"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2016"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46475-6_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T19:14:32Z","timestamp":1749582872000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46475-6_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319464749","9783319464756"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46475-6_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"17 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.eccv2016.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}