{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:20:26Z","timestamp":1753600826992,"version":"3.37.3"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100011512","name":"National Research Foundation (NRF)","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100011512","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001445","name":"Singapore and DSO National Laboratories","doi-asserted-by":"publisher","award":["AISG2-RP-2020-016"],"award-info":[{"award-number":["AISG2-RP-2020-016"]}],"id":[{"id":"10.13039\/501100001445","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001348","name":"Agency of Science, Technology and Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001348","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160640","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"11546-11553","source":"Crossref","is-referenced-by-count":6,"title":["Differentiable Parsing and Visual Grounding of Natural Language Instructions for Object Placement"],"prefix":"10.1109","author":[{"given":"Zirui","family":"Zhao","sequence":"first","affiliation":[{"name":"National University of Singapore"}]},{"given":"Wee Sun","family":"Lee","sequence":"additional","affiliation":[{"name":"National University of Singapore"}]},{"given":"David","family":"Hsu","sequence":"additional","affiliation":[{"name":"National University of Singapore"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Robot-initiated specification repair through grounded language interaction","author":"boteanu","year":"2017","journal-title":"ArXiv Preprint"},{"journal-title":"spaCy2 Natural language under-standing with Bloom embeddings convolutional neural networks and incremental parsing","year":"2017","author":"honnibal","key":"ref35"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907841"},{"key":"ref34","first-page":"1263","article-title":"Neural message passing for quantum chemistry","author":"gilmer","year":"2017","journal-title":"Int Conference on Machine Learning"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-101119-071628"},{"key":"ref37","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-060117-104838"},{"key":"ref36","first-page":"11 525","article-title":"Object-centric learning with slot attention","volume":"33","author":"locatello","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref31","article-title":"A universal part-of-speech tagset","author":"petrov","year":"2011","journal-title":"ArXiv Preprint"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-02131-2"},{"key":"ref11","first-page":"2649","article-title":"A model for verifiable grounding and execution of complex natural language in-structions","author":"boteanu","year":"2016","journal-title":"2016 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"},{"key":"ref33","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"radford","year":"2021","journal-title":"Int Conference on Machine Learning"},{"key":"ref10","first-page":"2","article-title":"Sorry dave, i'rn afraid i can't do that: Explaining unachievable robot tasks using natural language","author":"raman","year":"2013","journal-title":"Robotics Science and Systems"},{"key":"ref32","first-page":"92","article-title":"Uni-versal dependency annotation for multilingual parsing","author":"mcdonald","year":"2013","journal-title":"Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2 Short Papers)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919897133"},{"key":"ref1","article-title":"Interactive visual grounding of re-ferring expressions for human-robot interaction","author":"shridhar","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref17","first-page":"2702","article-title":"Discriminative embeddings of latent variable models for structured data","author":"dai","year":"2016","journal-title":"Int Conference on Machine Learning"},{"key":"ref39","article-title":"Space: Unsupervised object-oriented scene representation via spatial attention and decomposition","author":"lin","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.039"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/MAES.2010.5546308"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2006.261910"},{"key":"ref24","first-page":"56","article-title":"Multiplicative gaussian particle filter","author":"su","year":"2020","journal-title":"International Conference on Artificial Intelligence and Statis-tics"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2003.1211409"},{"key":"ref45","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1037"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-2812"},{"key":"ref20","first-page":"169","article-title":"Particle filter networks with application to visual localization","author":"karkus","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref42","first-page":"2016","author":"coumans","year":"0","journal-title":"Pybullet a python module for physics sim-ulation for games robotics and machine learning"},{"key":"ref41","article-title":"Deep sets","volume":"30","author":"zaheer","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5952"},{"key":"ref44","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref21","article-title":"Towards differentiable resampling","author":"zhu","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref43","article-title":"Nvisii: A scriptable tool for photorealistic image generation","author":"morrical","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2017.2754499"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.2307\/2171786"},{"key":"ref8","article-title":"Language-conditioned imitation learning for robot ma-nipulation tasks","volume":"33","author":"stepputtis","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref7","article-title":"Cliport: What and where path-ways for robotic manipulation","author":"shridhar","year":"2021","journal-title":"Proceedings of the 5th Conference on Robot Learning (CoRL)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.047"},{"key":"ref4","first-page":"491","article-title":"Composing pick-and-place tasks by grounding language","author":"mees","year":"2020","journal-title":"International Symposium on Experimental Robotics"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197472"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS47582.2021.9555802"},{"key":"ref5","article-title":"Structformer: Learning spatial structure for language-guided semantic rearrangement of novel objects","author":"liu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref40","volume":"4","author":"bishop","year":"2006","journal-title":"Pattern Recognition and Machine Learning"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2023,5,29]]},"location":"London, United Kingdom","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160640.pdf?arnumber=10160640","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:37:00Z","timestamp":1690220220000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160640\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160640","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}