{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T14:39:29Z","timestamp":1774276769107,"version":"3.50.1"},"reference-count":86,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2020,6,1]],"date-time":"2020-06-01T00:00:00Z","timestamp":1590969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,6,1]],"date-time":"2020-06-01T00:00:00Z","timestamp":1590969600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,6,1]],"date-time":"2020-06-01T00:00:00Z","timestamp":1590969600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"American Technologies Corporation"},{"DOI":"10.13039\/100015599","name":"Toyota Research Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015599","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Robot."],"published-print":{"date-parts":[[2020,6]]},"DOI":"10.1109\/tro.2019.2959445","type":"journal-article","created":{"date-parts":[[2020,3,20]],"date-time":"2020-03-20T20:08:15Z","timestamp":1584734895000},"page":"582-596","source":"Crossref","is-referenced-by-count":191,"title":["Making Sense of Vision and Touch: Learning Multimodal Representations for Contact-Rich Tasks"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9893-3591","authenticated-orcid":false,"given":"Michelle A.","family":"Lee","sequence":"first","affiliation":[]},{"given":"Yuke","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Zachares","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Tan","sequence":"additional","affiliation":[]},{"given":"Krishnan","family":"Srinivasan","sequence":"additional","affiliation":[]},{"given":"Silvio","family":"Savarese","sequence":"additional","affiliation":[]},{"given":"Li","family":"Fei-Fei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0482-4296","authenticated-orcid":false,"given":"Animesh","family":"Garg","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4921-7193","authenticated-orcid":false,"given":"Jeannette","family":"Bohg","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2016.7803371"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913506757"},{"key":"ref71","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref70","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"0","journal-title":"Proc Intl Conf on Learning Representations"},{"key":"ref76","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"CoRR"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-013-9365-9"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2018.2819658"},{"key":"ref39","article-title":"Stochastic variational video prediction","author":"babaeizadeh","year":"0","journal-title":"Proc Intl Conf on Learning Representations"},{"key":"ref75","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref38","first-page":"5074","article-title":"Learning to poke by poking: Experiential learning of intuitive physics","author":"agrawal","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914559753"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1177\/027836499501400103"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33950-0_37"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759592"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460875"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.538"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2800101"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.07.006"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-28619-4_41"},{"key":"ref60","first-page":"5575","article-title":"Multimodal generative models for scalable weakly-supervised learning","author":"wu","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref62","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref61","article-title":"Joint multimodal learning with deep generative models","author":"suzuki","year":"2016"},{"key":"ref63","article-title":"Auto-encoding variational bayes","author":"kingma","year":"0","journal-title":"Proc Intl Conf on Learning Representations"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-015-9435-2"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889774"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.044"},{"key":"ref65","first-page":"2758","article-title":"Flownet: Learning optical flow with convolutional networks","author":"fischer","year":"0","journal-title":"Proc IEEE Int Conf Comput Vision"},{"key":"ref66","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"0","journal-title":"Proc 3rd Int Conf Learn Representations"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref67","article-title":"Wavenet: A generative model for raw audio","author":"oord","year":"2016","journal-title":"Proc 9th Speech Synthesis Workshop"},{"key":"ref68","author":"thrun","year":"2005","journal-title":"Probabilistic Robotics"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2016.2645124"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/027836499000900603"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1111\/j.0956-7976.2004.00691.x"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759578"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793520"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2015.7363524"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6094878"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794219"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794233"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2852779"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5652967"},{"key":"ref51","article-title":"Multi-modal scene understanding for robotic grasping","author":"bohg","year":"2011"},{"key":"ref59","first-page":"2222","article-title":"Multimodal learning with deep boltzmann machines","author":"srivastava","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2017.2721939"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.2991\/978-94-6239-133-8_25"},{"key":"ref56","author":"edelman","year":"1987","journal-title":"Neural Darwinism The Theory of Neuronal Group Selection"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794285"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01086"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793763"},{"key":"ref52","first-page":"249","article-title":"Learning end-to-end multimodal sensor policies for autonomous navigation","author":"liu","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.009"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793485"},{"key":"ref40","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in atari games","author":"oh","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/027836498700600101"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1115\/1.3149634"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1989.100031"},{"key":"ref15","first-page":"314","article-title":"The feeling of success: Does touch sensing help predict grasp outcomes?","author":"calandra","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref82","article-title":"From variational to deterministic autoencoders","author":"ghosh","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630999"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28872-7_16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2015.7363558"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968201"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6095096"},{"key":"ref83","first-page":"3195","article-title":"Probabilistic object tracking using a range camera","author":"w\u00fcthrich","year":"0","journal-title":"Proc IEEE\/RSJ Int Conf Intell Robots Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989326"},{"key":"ref80","first-page":"496","article-title":"The chai libraries","author":"conti","year":"0","journal-title":"Proc Eurohaptics"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7354090"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2011.2162271"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aav3123"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6943202"},{"key":"ref85","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc 3rd Int Conf Learn Representations"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989384"},{"key":"ref86","article-title":"Pytorch implementations of reinforcement learning algorithms","author":"kostrikov","year":"2018"},{"key":"ref7","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593430"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206165"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907696"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594077"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794048"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"ref41","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"0","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487176"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.264"}],"container-title":["IEEE Transactions on Robotics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8860\/9109403\/09043710.pdf?arnumber=9043710","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T16:48:21Z","timestamp":1651078101000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9043710\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6]]},"references-count":86,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tro.2019.2959445","relation":{},"ISSN":["1552-3098","1941-0468"],"issn-type":[{"value":"1552-3098","type":"print"},{"value":"1941-0468","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,6]]}}}