{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:57:43Z","timestamp":1781539063674,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810612","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1573-1577","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Joint-Guided Spatial and Semantic Sensitive Diffusion Policy for Robotic Manipulation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1559-0344","authenticated-orcid":false,"given":"Hongda","family":"Zhang","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4285-3573","authenticated-orcid":false,"given":"Siao","family":"Liu","sequence":"additional","affiliation":[{"name":"Soochow University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2221-2998","authenticated-orcid":false,"given":"Yi","family":"Liu","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6249-4005","authenticated-orcid":false,"given":"Chun","family":"Ouyang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2709-6148","authenticated-orcid":false,"given":"Zhongxue","family":"Gan","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Cheng Chi Zhenjia Xu Siyuan Feng Eric Cousineau Yilun Du Benjamin Burchfiel Russ Tedrake and Shuran Song. 2025. Diffusion Policy: Visuomotor Policy Learning via Action Diffusion. The International Journal of Robotics Research 44 10\u201311 (2025) 1684\u20131704.","DOI":"10.1177\/02783649241273668"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_3_1_4_2","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. Advances in Neural Information Processing Systems 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00292"},{"key":"e_1_3_3_1_6_2","series-title":"Proceedings of Machine Learning Research","first-page":"2679","volume-title":"Proceedings of The 8th Conference on Robot Learning","volume":"270","author":"Kim Moo\u00a0Jin","year":"2025","unstructured":"Moo\u00a0Jin Kim, Karl Pertsch, Siddharth Karamcheti, Ted Xiao, Ashwin Balakrishna, Suraj Nair, Rafael Rafailov, Ethan\u00a0P. Foster, Pannag\u00a0R. Sanketi, Quan Vuong, Thomas Kollar, Benjamin Burchfiel, Russ Tedrake, Dorsa Sadigh, Sergey Levine, Percy Liang, and Chelsea Finn. 2025. OpenVLA: An Open-Source Vision-Language-Action Model. In Proceedings of The 8th Conference on Robot Learning(Proceedings of Machine Learning Research, Vol.\u00a0270). PMLR, Munich, Germany, 2679\u20132713. https:\/\/proceedings.mlr.press\/v270\/kim25c.html"},{"key":"e_1_3_3_1_7_2","series-title":"Proceedings of Machine Learning Research","first-page":"1730","volume-title":"Proceedings of The 8th Conference on Robot Learning","volume":"270","author":"Lee Andrew Choong-Won","year":"2025","unstructured":"Andrew Choong-Won Lee, Ian Chuang, Ling-Yuan Chen, and Iman Soltani. 2025. InterACT: Inter-dependency Aware Action Chunking with Hierarchical Attention Transformers for Bimanual Manipulation. In Proceedings of The 8th Conference on Robot Learning(Proceedings of Machine Learning Research, Vol.\u00a0270). PMLR, Munich, Germany, 1730\u20131743. https:\/\/proceedings.mlr.press\/v270\/lee25a.html"},{"key":"e_1_3_3_1_8_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Li Xinghang","year":"2024","unstructured":"Xinghang Li, Minghuan Liu, Hanbo Zhang, Cunjun Yu, Jie Xu, Hongtao Wu, Chilam Cheang, Ya Jing, Weinan Zhang, Huaping Liu, Hang Li, and Tao Kong. 2024. Vision-Language Foundation Models as Effective Robot Imitators. In The Twelfth International Conference on Learning Representations. Vienna, Austria, 17\u00a0pages. https:\/\/openreview.net\/forum?id=lFYj0oibGR"},{"key":"e_1_3_3_1_9_2","volume-title":"Proceedings of Robotics: Science and Systems","author":"Team Octo Model","year":"2024","unstructured":"Octo Model Team, Dibya Ghosh, Homer\u00a0Rich Walke, Karl Pertsch, Kevin Black, Oier Mees, Sudeep Dasari, Joey Hejna, Tobias Kreiman, Charles Xu, Jianlan Luo, You\u00a0Liang Tan, Lawrence\u00a0Yunliang Chen, Quan Vuong, Ted Xiao, Pannag\u00a0R. Sanketi, Dorsa Sadigh, Chelsea Finn, and Sergey Levine. 2024. Octo: An Open-Source Generalist Robot Policy. In Proceedings of Robotics: Science and Systems. Delft, Netherlands. https:\/\/roboticsconference.org\/2024\/program\/papers\/90\/"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611477"},{"key":"e_1_3_3_1_11_2","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy\u00a0V. Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby Mido Assran Nicolas Ballas Wojciech Galuba Russell Howes Po-Yao Huang Shang-Wen Li Ishan Misra Michael Rabbat Vasu Sharma Gabriel Synnaeve Hu Xu Herv\u00e9 J\u00e9gou Julien Mairal and Patrick Labatut. 2024. DINOv2: Learning Robust Visual Features without Supervision. Transactions on Machine Learning Research (2024). https:\/\/openreview.net\/forum?id=a68SUt6zFt Featured Certification."},{"key":"e_1_3_3_1_12_2","unstructured":"Wenxuan Song Ziyang Zhou Han Zhao Jiayi Chen Pengxiang Ding Haodong Yan Yuxin Huang Feilong Tang Donglin Wang and Haoang Li. 2025. ReconVLA: Reconstructive Vision-Language-Action Model as Effective Robot Perceiver. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.10333 (2025)."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888293"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.016"},{"key":"e_1_3_3_1_16_2","series-title":"Proceedings of Machine Learning Research","first-page":"2165","volume-title":"Proceedings of The 7th Conference on Robot Learning","volume":"229","author":"Zitkovich Brianna","year":"2023","unstructured":"Brianna Zitkovich, Tianhe Yu, Sichun Xu, Peng Xu, Ted Xiao, Fei Xia, Jialin Wu, Paul Wohlhart, Stefan Welker, Ayzaan Wahid, Quan Vuong, Vincent Vanhoucke, Huong Tran, Radu Soricut, Anikait Singh, Jaspiar Singh, Pierre Sermanet, Pannag\u00a0R. Sanketi, Grecia Salazar, Michael\u00a0S. Ryoo, Alex Joshi, Brian Irpan, Alex Ichter, Jasmine Hsu, Alexander Herzog, Karol Hausman, Keerthana Gopalakrishnan, Chuyuan Fu, Pete Florence, Chelsea Finn, Kumar\u00a0Avinava Dubey, Danny Driess, Tianli Ding, Krzysztof\u00a0Marcin Choromanski, Xi Chen, Yevgen Chebotar, Justice Carbajal, Noah Brown, Anthony Brohan, Montserrat\u00a0Gonzalez Arenas, and Kehang Han. 2023. RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control. In Proceedings of The 7th Conference on Robot Learning(Proceedings of Machine Learning Research, Vol.\u00a0229). PMLR, 2165\u20132183. https:\/\/proceedings.mlr.press\/v229\/zitkovich23a.html"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:44:25Z","timestamp":1781538265000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810612"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":15,"alternative-id":["10.1145\/3805622.3810612","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810612","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}