{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:40:03Z","timestamp":1766061603980,"version":"3.48.0"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11245844","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"701-707","source":"Crossref","is-referenced-by-count":0,"title":["Stimulating Imagination: Towards General-purpose \"Something Something Placement\""],"prefix":"10.1109","author":[{"given":"Jianyang","family":"Wu","sequence":"first","affiliation":[{"name":"Rightly Robotics,Hangzhou,China"}]},{"given":"Jie","family":"Gu","sequence":"additional","affiliation":[{"name":"Rightly Robotics,Hangzhou,China"}]},{"given":"Xiaokang","family":"Ma","sequence":"additional","affiliation":[{"name":"Rightly Robotics,Hangzhou,China"}]},{"given":"Fangzhou","family":"Qiu","sequence":"additional","affiliation":[{"name":"Rightly Robotics,Hangzhou,China"}]},{"given":"Chu","family":"Tang","sequence":"additional","affiliation":[{"name":"Rightly Robotics,Hangzhou,China"}]},{"given":"Jingmin","family":"Chen","sequence":"additional","affiliation":[{"name":"Rightly Robotics,Hangzhou,China"}]}],"member":"263","reference":[{"article-title":"RT-2: vision-language-action models transfer web knowledge to robotic control","year":"2023","author":"Brohan","key":"ref1"},{"article-title":"Zero-shot robotic manipulation with pretrained image-editing diffusion models","year":"2023","author":"Black","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.010"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.027"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811931"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.031"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00915"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref9","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst","author":"Ho"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197472"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560895"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340925"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981832"},{"key":"ref14","first-page":"148","article-title":"Sornet: Spatial object-centric representations for sequential manipulation","volume-title":"Proc. Conf. Robot Learning","author":"Yuan"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2023.3272516\/mm1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10611220"},{"article-title":"Transporter networks: Rearranging the visual world for robotic manipulation","volume-title":"Proc. Conf. Robot Learning","author":"Zeng","key":"ref17"},{"key":"ref18","first-page":"894","article-title":"Cliport: What and where pathways for robotic manipulation","volume-title":"Proc. Conf. Robot Learning","author":"Shridhar"},{"key":"ref19","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Machine Learning","volume":"139","author":"Radford"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"article-title":"VIMA: general robot manipulation with multimodal prompts","year":"2022","author":"Jiang","key":"ref21"},{"year":"2023","key":"ref22","article-title":"GPT-4 technical report"},{"key":"ref23","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst","author":"Brown"},{"article-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref24"},{"key":"ref25","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst","author":"Liu"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/vl\/N19-142"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0229-5"},{"first-page":"2016","article-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","author":"Coumans","key":"ref31"},{"key":"ref32","article-title":"OpenGL"},{"article-title":"Shapenet: An information-rich 3d model repository","year":"2015","author":"Chang","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2015.7301289"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.915"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_21"},{"article-title":"Learning to act from actionless videos through dense correspondences","year":"2023","author":"Ko","key":"ref38"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01146"},{"key":"ref42","article-title":"MoveIt"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11245844.pdf?arnumber=11245844","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:35:23Z","timestamp":1766061323000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11245844\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11245844","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}