{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T00:46:34Z","timestamp":1759365994939,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032009852"},{"type":"electronic","value":"9783032009869"}],"license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-00986-9_32","type":"book-chapter","created":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T23:31:21Z","timestamp":1759275081000},"page":"510-523","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Is an\u00a0Object-Centric Representation Beneficial for\u00a0Robotic Manipulation?"],"prefix":"10.1007","author":[{"given":"Alexandre","family":"Chapin","sequence":"first","affiliation":[]},{"given":"Emmanuel","family":"Dellandr\u00e9a","sequence":"additional","affiliation":[]},{"given":"Liming","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,1]]},"reference":[{"key":"32_CR1","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: A review and new perspectives (2014). https:\/\/arxiv.org\/abs\/1206.5538"},{"key":"32_CR2","unstructured":"Brohan, A., et\u00a0al.: Rt-1: Robotics transformer for real-world control at scale (2023). https:\/\/arxiv.org\/abs\/2212.06817"},{"key":"32_CR3","unstructured":"Burgess, C.P., et al.: MONET: Unsupervised scene decomposition and representation (2019). https:\/\/arxiv.org\/abs\/1901.11390"},{"key":"32_CR4","unstructured":"Burns, K., Witzel, Z., Hamid, J.I., Yu, T., Finn, C., Hausman, K.: What makes pre-trained visual representations successful for robust manipulation? (2023). https:\/\/arxiv.org\/abs\/2312.12444"},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers (2021). https:\/\/arxiv.org\/abs\/2104.14294","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"32_CR6","doi-asserted-by":"crossref","unstructured":"Chi, C., et al.: Diffusion policy: Visuomotor policy learning via action diffusion (2024). https:\/\/arxiv.org\/abs\/2303.04137","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"32_CR7","unstructured":"Collaboration, E., O\u2019Neill, A., et\u00a0al.: Open x-embodiment: Robotic learning datasets and rt-x models (2024). https:\/\/arxiv.org\/abs\/2310.08864"},{"key":"32_CR8","unstructured":"Dalal, M., Mandlekar, A., Garrett, C., Handa, A., Salakhutdinov, R., Fox, D.: Imitating task and motion planning with visuomotor transformers (2023)"},{"key":"32_CR9","unstructured":"Elsayed, G.F., et al.: SAVi++: Towards end-to-end object-centric learning from real-world videos (2022)"},{"key":"32_CR10","doi-asserted-by":"publisher","unstructured":"Ha, D., Schmidhuber, J.: World models (2018). https:\/\/doi.org\/10.5281\/ZENODO.1207631, https:\/\/zenodo.org\/record\/1207631","DOI":"10.5281\/ZENODO.1207631"},{"key":"32_CR11","unstructured":"Hafner, D., Lillicrap, T., Ba, J., Norouzi, M.: Dream to control: Learning behaviors by latent imagination (2020). https:\/\/arxiv.org\/abs\/1912.01603"},{"key":"32_CR12","unstructured":"Haldar, S., Peng, Z., Pinto, L.: BAKU: An efficient transformer for multi-task policy learning (2024). https:\/\/arxiv.org\/abs\/2406.07539"},{"key":"32_CR13","unstructured":"Haramati, D., Daniel, T., Tamar, A.: Entity-centric reinforcement learning for object manipulation from pixels (2024). https:\/\/arxiv.org\/abs\/2404.01220"},{"key":"32_CR14","doi-asserted-by":"crossref","unstructured":"Heravi, N., et al.: Visuomotor control in multi-object scenes using object-aware representations (2023). https:\/\/arxiv.org\/abs\/2205.06333","DOI":"10.1109\/ICRA48891.2023.10160888"},{"key":"32_CR15","unstructured":"Higgins, I., et al.: beta-VAE: learning basic visual concepts with a constrained variational framework. In: International Conference on Learning Representations (2017). https:\/\/openreview.net\/forum?id=Sy2fzU9gl"},{"key":"32_CR16","unstructured":"Hu, Y., Wang, R., Li, L.E., Gao, Y.: For pre-trained vision models in motor control, not all policy learning methods are created equal (2023). https:\/\/arxiv.org\/abs\/2304.04591"},{"key":"32_CR17","unstructured":"Jiang, J., Deng, F., Singh, G., Ahn, S.: Object-centric slot diffusion (2023). https:\/\/arxiv.org\/abs\/2303.10834"},{"key":"32_CR18","unstructured":"Kabra, R., et al.: SIMONe: View-invariant, temporally-abstracted object representations via unsupervised video decomposition (2021). https:\/\/arxiv.org\/abs\/2106.03849"},{"key":"32_CR19","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes (2022). https:\/\/arxiv.org\/abs\/1312.6114"},{"key":"32_CR20","unstructured":"Kipf, T., et al.: Conditional object-centric learning from video (2022). https:\/\/arxiv.org\/abs\/2111.12594"},{"key":"32_CR21","unstructured":"Kroemer, O., Niekum, S., Konidaris, G.: A review of robot learning for manipulation: Challenges, representations, and algorithms (2020). https:\/\/arxiv.org\/abs\/1907.03146"},{"key":"32_CR22","unstructured":"Locatello, F., et al.: Object-centric learning with slot attention (2020). https:\/\/arxiv.org\/abs\/2006.15055"},{"key":"32_CR23","unstructured":"Ma, Y.J., et al.: VIP: Towards universal visual reward and representation via value-implicit pre-training (2023). https:\/\/arxiv.org\/abs\/2210.00030"},{"key":"32_CR24","unstructured":"Majumdar, A., et al.: Where are we in the search for an artificial visual cortex for embodied intelligence? (2024). https:\/\/arxiv.org\/abs\/2303.18240"},{"key":"32_CR25","unstructured":"Mandlekar, A., et al.: What matters in learning from offline human demonstrations for robot manipulation (2021). https:\/\/arxiv.org\/abs\/2108.03298"},{"key":"32_CR26","unstructured":"Nair, S., Rajeswaran, A., Kumar, V., Finn, C., Gupta, A.: R3M: A universal visual representation for robot manipulation (2022). https:\/\/arxiv.org\/abs\/2203.12601"},{"key":"32_CR27","unstructured":"Oquab, M., et al.: DINOv2: Learning robust visual features without supervision (2024). https:\/\/arxiv.org\/abs\/2304.07193"},{"key":"32_CR28","doi-asserted-by":"crossref","unstructured":"Qian, J., Panagopoulos, A., Jayaraman, D.: Recasting generic pretrained vision transformers as object-centric scene encoders for manipulation policies. arXiv preprint arXiv:2405.15916 (2024)","DOI":"10.1109\/ICRA57147.2024.10610131"},{"key":"32_CR29","unstructured":"Radosavovic, I., Xiao, T., James, S., Abbeel, P., Malik, J., Darrell, T.: Real-world robot learning with masked visual pre-training (2022). https:\/\/arxiv.org\/abs\/2210.03109"},{"key":"32_CR30","unstructured":"Seitzer, M., et al.: Bridging the gap to real-world object-centric learning (2023). https:\/\/arxiv.org\/abs\/2209.14860"},{"key":"32_CR31","unstructured":"Shang, J., et al.: Theia: Distilling diverse vision foundation models for robot learning (2024). https:\/\/arxiv.org\/abs\/2407.20179"},{"key":"32_CR32","unstructured":"Singh, G., Deng, F., Ahn, S.: Illiterate DALL-E learns to compose (2022). https:\/\/arxiv.org\/abs\/2110.11405"},{"key":"32_CR33","unstructured":"Singh, G., Wu, Y.F., Ahn, S.: Simple unsupervised object-centric learning for complex and naturalistic videos (2022). https:\/\/arxiv.org\/abs\/2205.14065"},{"key":"32_CR34","doi-asserted-by":"crossref","unstructured":"Tao, S., et al.: ManiSkill3: GPU parallelized robotics simulation and rendering for generalizable embodied AI (2024). https:\/\/arxiv.org\/abs\/2410.00425","DOI":"10.15607\/RSS.2025.XXI.021"},{"key":"32_CR35","unstructured":"Team, O.M., et al.: Octo: An open-source generalist robot policy (2024). https:\/\/arxiv.org\/abs\/2405.12213"},{"key":"32_CR36","unstructured":"Watters, N., Matthey, L., Bosnjak, M., Burgess, C.P., Lerchner, A.: COBRA: Data-efficient model-based RL through unsupervised object discovery and curiosity-driven exploration (2019). https:\/\/arxiv.org\/abs\/1905.09275"},{"key":"32_CR37","unstructured":"Wu, Z., Dvornik, N., Greff, K., Kipf, T., Garg, A.: SlotFormer: Unsupervised visual dynamics simulation with object-centric models (2023). https:\/\/arxiv.org\/abs\/2210.05861"},{"key":"32_CR38","unstructured":"Wu, Z., Hu, J., Lu, W., Gilitschenski, I., Garg, A.: SlotDiffusion: Object-centric generative modeling with diffusion models (2023). https:\/\/arxiv.org\/abs\/2305.11281"},{"key":"32_CR39","doi-asserted-by":"crossref","unstructured":"Xiang, F., et al.: SAPIEN: a simulated part-based interactive environment. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"32_CR40","unstructured":"Yoon, J., Wu, Y.F., Bae, H., Ahn, S.: An investigation into pre-training object-centric representations for reinforcement learning (2023). https:\/\/arxiv.org\/abs\/2302.04419"},{"key":"32_CR41","unstructured":"Zadaianchuk, A., Seitzer, M., Martius, G.: Object-centric learning for real-world videos by predicting temporal feature similarities (2023). https:\/\/arxiv.org\/abs\/2306.04829"},{"key":"32_CR42","unstructured":"Zhang, C., Gupta, A., Zisserman, A.: Is an object-centric video representation beneficial for transfer? (2022). https:\/\/arxiv.org\/abs\/2207.10075"},{"key":"32_CR43","unstructured":"Zhu, Y., Joshi, A., Stone, P., Zhu, Y.: VIOLA: Imitation learning for vision-based manipulation with object proposal priors (2023). https:\/\/arxiv.org\/abs\/2210.11339"}],"container-title":["Communications in Computer and Information Science","Robotics, Computer Vision and Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-00986-9_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T23:31:30Z","timestamp":1759275090000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-00986-9_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,1]]},"ISBN":["9783032009852","9783032009869"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-00986-9_32","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2025,10,1]]},"assertion":[{"value":"1 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ROBOVIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Robotics, Computer Vision and Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 February 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 February 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"robovis2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/robovis.scitevents.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}