{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T22:18:57Z","timestamp":1769033937930,"version":"3.49.0"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/cbmi66578.2025.11339329","type":"proceedings-article","created":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T20:38:56Z","timestamp":1768941536000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Text-Oriented Image Query Representation for Zero-Shot Composed Image Retrieval"],"prefix":"10.1109","author":[{"given":"Pavan Kartheek","family":"Rachabathuni","sequence":"first","affiliation":[{"name":"University of Florence - MICC,Florence,Italy"}]},{"given":"Andrea","family":"Ciamarra","sequence":"additional","affiliation":[{"name":"CNIT Florence,Florence,Italy"}]},{"given":"Roberto","family":"Caldelli","sequence":"additional","affiliation":[{"name":"CNIT Florence and Universitas Mercatorum,Rome,Italy"}]},{"given":"Marco","family":"Bertini","sequence":"additional","affiliation":[{"name":"University of Florence - MICC,Florence,Italy"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Content-based image retrieval and the semantic gap in the deep learning era","volume-title":"CoRR","volume":"abs\/2011.06490","author":"Barz","year":"2020"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00970"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072054"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3126686.3126763"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d18-1281"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01650"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475485"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413622"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548401"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3593539"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681649"},{"key":"ref12","article-title":"Vision-by-language for training-free compositional image retrieval","volume-title":"Proc. of the International Conference on Learning Representations (ICLR)","author":"Karthik"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01850"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657740"},{"issue":"10","key":"ref15","first-page":"1251","article-title":"Content based image retrieval","volume":"3","author":"Jain","year":"2015","journal-title":"Int. J. Adv. Eng. Glob. Technol"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072432"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2393379"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2396335"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123403"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.02080"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475659"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611817"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657823"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475483"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548126"},{"key":"ref26","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. of the International Conference on Machine Learning","author":"Radford"},{"key":"ref27","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. of the International Conference on Machine Learning","author":"Li"},{"key":"ref28","article-title":"Sentence-level prompts benefit composed image retrieval","volume-title":"Proc. of the International Conference on Learning Representations","author":"Bai"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3617597"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00565"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20044-1_32"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01256"},{"key":"ref33","first-page":"21","article-title":"MagicLens: Self-supervised image retrieval with open-ended instructions","volume":"235","author":"Zhang","journal-title":"Proc. of the International Conference on Machine Learning"},{"key":"ref34","article-title":"Zero-shot composed text-image retrieval","volume-title":"Proc. of the British Machine Vision Conference (BMVC)","author":"Liu","year":"2023"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00213"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01115"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01407"},{"issue":"4","key":"ref38","first-page":"2991","article-title":"Data roaming and quality assessment for composed image retrieval","volume-title":"Proc. of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Levy"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3463799"},{"key":"ref40","volume-title":"Compodiff: Versatile composed image retrieval with latent diffusion","author":"Gu","year":"2024"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00660"}],"event":{"name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","location":"Dublin, Ireland","start":{"date-parts":[[2025,10,22]]},"end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 International Conference on Content-Based Multimedia Indexing (CBMI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11339229\/11339242\/11339329.pdf?arnumber=11339329","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T07:31:21Z","timestamp":1768980681000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11339329\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/cbmi66578.2025.11339329","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}