{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:21:22Z","timestamp":1777890082634,"version":"3.51.4"},"reference-count":66,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.00869","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"9309-9319","source":"Crossref","is-referenced-by-count":0,"title":["GECO: Geometrically Consistent Embedding with Lightspeed Inference"],"prefix":"10.1109","author":[{"given":"Regine","family":"Hartwig","sequence":"first","affiliation":[{"name":"TU Munich Munich,Center for Machine Learning"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dominik","family":"Muhle","sequence":"additional","affiliation":[{"name":"TU Munich Munich,Center for Machine Learning"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Riccardo","family":"Marin","sequence":"additional","affiliation":[{"name":"TU Munich Munich,Center for Machine Learning"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Cremers","sequence":"additional","affiliation":[{"name":"TU Munich Munich,Center for Machine Learning"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"issue":"3","key":"ref1","first-page":"4","article-title":"Deep vit features as dense visual descriptors","volume":"2","author":"Amir","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14778"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref5","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"ICML","author":"Chen","year":"2020"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.254"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00595"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_16"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3218727"},{"key":"ref10","first-page":"28104","article-title":"Unsupervised part discovery from contrastive reconstruction","volume":"34","author":"Choudhury","year":"2021","journal-title":"NeurIPS"},{"key":"ref11","article-title":"Sinkhorn distances: Lightspeed computation of optimal transport","volume-title":"NeurIPS","author":"Cuturi","year":"2013"},{"key":"ref12","first-page":"2024","volume-title":"Tutorial on optimal transport in learning, control, and dynamical systems","author":"Cuturi","year":"2023"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00060"},{"key":"ref14","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume-title":"NeurIPS","volume":"34","author":"Dhariwal","year":"2021"},{"key":"ref15","article-title":"Do it yourself: Learning semantic correspondence from pseudo-labels","author":"D\u00fcnkel","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02059"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2026.3658965"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00658"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_30"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3497510"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00382"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2724510"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0363"},{"issue":"2","key":"ref26","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu","year":"2022","journal-title":"ICLR"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_16"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00096"},{"key":"ref29","article-title":"Openclip","author":"Ilharco","year":"2021","journal-title":"Zenodo"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01672"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555392"},{"key":"ref32","first-page":"124391","article-title":"Loco: Learning 3d location-consistent image features with a memory-efficient ranking loss","volume":"37","author":"Kloepfer","year":"2024","journal-title":"NeurIPS"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73220-1_5"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00742"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"ref36","article-title":"Diffusion hyperfeatures: Searching through time and space for semantic correspondence","volume-title":"NeurIPS","volume":"36","author":"Luo","year":"2024"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01846"},{"key":"ref38","article-title":"Spair-71k: A large-scale benchmark for semantic correspondence","author":"Min","journal-title":"arXiv preprint arXiv"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19769-7_17"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.306"},{"key":"ref41","volume-title":"Ngc catalog 2023 tutorial 21559","year":"2023"},{"key":"ref42","article-title":"Dinov2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459199"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1561\/2200000073"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01081"},{"key":"ref46","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"ICML","author":"Radford","year":"2021"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00020"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"ref51","first-page":"1363","article-title":"Emergent correspondence from image diffusion","volume":"36","author":"Tang","year":"2023","journal-title":"NeurIPS"},{"key":"ref52","article-title":"Unsupervised learning of object frames by dense equivariant image labelling","volume":"30","author":"Thewlis","year":"2017","journal-title":"NeurIPS"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.00851"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01048"},{"key":"ref55","volume-title":"The caltech-ucsd birds-200\u20132011 dataset","author":"Wah","year":"2011"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00499"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02530"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555479"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01010"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02522"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02042"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72627-9_4"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00297"},{"key":"ref65","article-title":"A tale of two features: Stable diffusion complements dino for zero-shot semantic correspondence","volume-title":"NeurIPS","volume":"36","author":"Zhang","year":"2024"},{"key":"ref66","article-title":"ibot: Image bert pre-training with online tokenizer","author":"Zhou","year":"2021","journal-title":"arXiv preprint arXiv"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11443312.pdf?arnumber=11443312","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:28:18Z","timestamp":1777613298000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11443312\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":66,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.00869","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}