{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:21:30Z","timestamp":1777890090484,"version":"3.51.4"},"reference-count":90,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.00656","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"01-13","source":"Crossref","is-referenced-by-count":0,"title":["Weakly-Supervised Learning of Dense Functional Correspondences"],"prefix":"10.1109","author":[{"given":"Stefan","family":"Stojanov","sequence":"first","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Linan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunzhi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel L. K.","family":"Yamins","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajun","family":"Wu","sequence":"additional","affiliation":[{"name":"Stanford University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"3","author":"Amir","year":"2021","journal-title":"Deep vit features as dense visual descriptors"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00891"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref4","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"International conference on machine learning","author":"Chen","year":"2020"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00108"},{"key":"ref6","article-title":"Blender Online Community","year":"2018","journal-title":"Blender - a 3D modelling and rendering package. Blender Foundation, Stichting Blender Foundation"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref8","first-page":"5","author":"Deitke","year":"2024","journal-title":"Molmo and pixmo: Open weights and open data for state-of-the-art multimodal models"},{"key":"ref9","first-page":"5","article-title":"Objaverse-xl: A universe of 10 ~\\mathrm{m}+3 ~\\mathrm{d} objects","volume":"36","author":"Deitke","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00182"},{"key":"ref11","first-page":"3","author":"Devlin","year":"2018","journal-title":"Bert: Pre-training of deep bidirectional transformers for language understanding"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3349832"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802523"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460902"},{"key":"ref15","first-page":"5","author":"Dubey","year":"2024","journal-title":"The llama 3 herd of models"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01146"},{"key":"ref17","first-page":"373","article-title":"Dense object nets: Learning dense visual object descriptors by and for robotic manipulation","volume-title":"Conference on Robot Learning","author":"Peter","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.4324\/9780203767764"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00728"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341672"},{"key":"ref21","article-title":"HDRI Haven","volume-title":"Hdri haven","year":"2024"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801982"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801993"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_40"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00615"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611587"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72940-9_13"},{"key":"ref30","first-page":"5","article-title":"Segment anything in high quality","volume":"36","author":"Ke","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1111\/1467-8624.00228"},{"key":"ref32","first-page":"6","author":"Kingma","year":"2017","journal-title":"Adam: A method for stochastic optimization"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73226-3_8"},{"key":"ref35","first-page":"3","author":"Kuang","year":"2024","journal-title":"Ram: Retrieval-based affordance transfer for generalizable zero-shot robotic manipulation"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/2516971.2516975"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01548"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1006\/jmla.1997.2533"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1762"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01051"},{"key":"ref41","first-page":"2","author":"Li","year":"2024","journal-title":"Learning precise affordances from egocentric videos for robotic manipulation"},{"key":"ref42","first-page":"5","volume":"3","author":"Lin","year":"2023","journal-title":"Sphinx: The joint mixing of weights, tasks, and visual embeddings for multi-modal large language models"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.00593"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00229"},{"key":"ref45","first-page":"5","article-title":"Scalable 3d captioning with pretrained models","volume":"36","author":"Luo","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73254-6_27"},{"key":"ref47","first-page":"6","volume":"2","author":"Min","year":"2019","journal-title":"Spair-71k: A large-scale benchmark for semantic correspondence"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00299"},{"key":"ref49","first-page":"15401557","article-title":"Same object, different grasps: Data and semantic knowledge for task-oriented grasping","volume-title":"Conference on robot learning","author":"Murali","year":"2021"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139369"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00878"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206484"},{"key":"ref53","first-page":"4","author":"AI","year":"2024","journal-title":"Gpt-4 technical report"},{"issue":"2","key":"ref54","first-page":"3","volume":"1","author":"Oquab","year":"2023","journal-title":"Dinov2: Learning robust visual features without supervision"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610923"},{"key":"ref56","first-page":"3","article-title":"Miles: Making imitation learning easy with self-supervision","volume-title":"Proceedings of the Conference on Robot Learning (CoRL)","author":"Papagiannis","year":"2024"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00754"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196971"},{"key":"ref59","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford","year":"2021"},{"issue":"140","key":"ref60","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"Journal of Machine Learning Research"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0913"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00688"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0068"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.460"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560894"},{"key":"ref71","first-page":"3","author":"Touvron","year":"2023","journal-title":"Llama: Open and efficient foundation language models"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00408"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.060"},{"key":"ref74","first-page":"6","author":"van den Oord","year":"2019","journal-title":"Representation learning with contrastive predictive coding"},{"key":"ref75","first-page":"5","article-title":"Knowledge transfer from vision foundation models for efficient training of small task-specific models","volume-title":"Forty-first International Conference on Machine Learning","author":"Vemulapalli","year":"2024"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01293"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00275"},{"issue":"2","key":"ref78","first-page":"3","volume":"1","author":"Wang","year":"2023","journal-title":"Cogvlm: Visual expert for pretrained language models"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1016\/j.cogdev.2009.10.003"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00084"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3062560"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01001"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00883"},{"key":"ref85","first-page":"3","author":"Yuan","year":"2024","journal-title":"Robopoint: A vision-language model for spatial affordance prediction for robotics"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"ref87","first-page":"7","volume":"1","author":"Zhang","year":"2023","journal-title":"A tale of two features: Stable diffusion complements dino for zero-shot semantic correspondence"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01840"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298903"},{"key":"ref90","first-page":"3","author":"Zhu","year":"2024","journal-title":"Visionbased manipulation from single human video with openworld object graphs"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11444211.pdf?arnumber=11444211","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:29:26Z","timestamp":1777613366000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11444211\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":90,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.00656","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}