{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T06:15:08Z","timestamp":1775283308857,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,18]],"date-time":"2023-07-18T00:00:00Z","timestamp":1689638400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NSERC"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,19]]},"DOI":"10.1145\/3539618.3591903","type":"proceedings-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:22:59Z","timestamp":1689726179000},"page":"2975-2984","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["AToMiC: An Image\/Text Retrieval Test Collection to Support Multimedia Content Creation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5469-312X","authenticated-orcid":false,"given":"Jheng-Hong","family":"Yang","sequence":"first","affiliation":[{"name":"University of Waterloo, Waterloo, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7754-6656","authenticated-orcid":false,"given":"Carlos","family":"Lassance","sequence":"additional","affiliation":[{"name":"Naver Labs Europe, Grenoble, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4853-105X","authenticated-orcid":false,"given":"Rafael","family":"Sampaio De Rezende","sequence":"additional","affiliation":[{"name":"Naver Labs Europe, Grenoble, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1366-0895","authenticated-orcid":false,"given":"Krishna","family":"Srinivasan","sequence":"additional","affiliation":[{"name":"Google Research, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0581-0251","authenticated-orcid":false,"given":"Miriam","family":"Redi","sequence":"additional","affiliation":[{"name":"Wikimedia Foundation, London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2367-8837","authenticated-orcid":false,"given":"St\u00e9phane","family":"Clinchant","sequence":"additional","affiliation":[{"name":"Naver Labs Europe, Grenoble, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0661-7189","authenticated-orcid":false,"given":"Jimmy","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Waterloo, Waterloo, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-008-0246-8"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2699668"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99739-7_30"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.15294\/elt.v9i1.34614"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99736-6_36"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_7_1","volume-title":"Minsuk Chang Chang, and Seong Joon Oh","author":"Chun Sanghyuk","year":"2022","unstructured":"Sanghyuk Chun, Wonjae Kim, Song Park, Minsuk Chang Chang, and Seong Joon Oh. 2022. ECCV Caption: Correcting False Negatives by Collecting Machine-and-Human-verified Image-Caption Associations for MS-COCO. In Proc. of ECCV. 1--19."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00831"},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. of BMVC.","author":"Faghri Fartash","year":"2018","unstructured":"Fartash Faghri, David J. Fleet, Jamie Ryan Kiros, and Sanja Fidler. 2018. VSE++: Improving Visual-Semantic Embeddings with Hard Negatives. In Proc. of BMVC."},{"key":"e_1_3_2_1_10_1","volume-title":"Overview of the INEX 2007 ad hoc track. In Proc. of INEX. 1--23","author":"Fuhr Norbert","year":"2007","unstructured":"Norbert Fuhr, Jaap Kamps, Mounia Lalmas, Saadia Malik, and Andrew Trotman. 2007. Overview of the INEX 2007 ad hoc track. In Proc. of INEX. 1--23."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28231-2_12"},{"key":"e_1_3_2_1_12_1","volume-title":"FIRE Workshops. 63--66","author":"Ganguly Debasis","unstructured":"Debasis Ganguly, Iacer Calixto, and Gareth J.F. Jones. 2015. Overview of the Automated Story Illustration Task at FIRE 2015.. In FIRE Workshops. 63--66."},{"key":"e_1_3_2_1_13_1","first-page":"503","article-title":"Retrieve Fast, Rerank Smart: Cooperative and Joint Approaches for Improved Cross-Modal Retrieval","volume":"10","author":"Geigle Gregor","year":"2022","unstructured":"Gregor Geigle, Jonas Pfeiffer, Nils Reimers, Ivan Vuli\u0107, and Iryna Gurevych. 2022. Retrieve Fast, Rerank Smart: Cooperative and Joint Approaches for Improved Cross-Modal Retrieval. Trans. of ACL 10 (2022), 503--521.","journal-title":"Trans. of ACL"},{"key":"e_1_3_2_1_14_1","volume-title":"Davis","author":"Han Xintong","year":"2017","unstructured":"Xintong Han, Zuxuan Wu, Phoenix X. Huang, Xiao Zhang, Menglong Zhu, Yuan Li, Yang Zhao, and Larry S. Davis. 2017. Automatic Spatially-aware Fashion Concept Discovery. In Proc. of ICCV. 1463--1471."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3994"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Bogdan Ionescu Henning M\u00fcller Renaud P\u00e9teri Johannes R\u00fcckert Asma Ben Abacha Alba G. Seco de Herrera Christoph M. Friedrich Louise Bloch Raphael Br\u00fcngel Ahmad Idrissi-Yaghir Henning Sch\u00e4fer Serge Kozlovski Yashin Dicente Cid Vassili Kovalev Liviu-Daniel \u015etefan Mihai Gabriel Constantin Mihai Dogariu Adrian Popescu J\u00e9r\u00f4me Deshayes-Chossart Hugo Schindler Jon Chamberlain Antonio Campello and Adrian Clark. 2022. Overview of the ImageCLEF 2022: Multimedia Retrieval in Medical Social Media and Nature Applications. In Proc. of CLEF. 541--564.","DOI":"10.1007\/978-3-031-13643-6_31"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.308"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1469"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02766607"},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. of ICML.","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In Proc. of ICML."},{"key":"e_1_3_2_1_22_1","first-page":"9694","article-title":"Align Before Fuse: Vision and Language Representation Learning with Momentum Distillation","volume":"34","author":"Li Junnan","year":"2021","unstructured":"Junnan Li, Ramprasaath Selvaraju, Akhilesh Gotmare, Shafiq Joty, Caiming Xiong, and Steven Chu Hong Hoi. 2021. Align Before Fuse: Vision and Language Representation Learning with Momentum Distillation. Proc. of NeurIPS 34, 9694--9705.","journal-title":"Proc. of NeurIPS"},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of ECCV. 740--755","author":"Lin Tsung-Yi","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In Proc. of ECCV. 740--755."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.542"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524273.3532891"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1108\/00220410310506303"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Henning M\u00fcller Paul D. Clough Thomas Deselaers and Barbara Caputo (Eds.). 2010. ImageCLEF Experimental Evaluation in Visual Information Retrieval. Springer.","DOI":"10.1007\/978-3-642-15181-1"},{"key":"e_1_3_2_1_29_1","volume-title":"Proc. of CLEF.","author":"Petras Vivien","unstructured":"Vivien Petras and Paul D. Clough. 2011. Introduction to the CLEF 2011 Labs. In Proc. of CLEF."},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of CLEF.","author":"Popescu Adrian","year":"2010","unstructured":"Adrian Popescu, Theodora Tsikrika, and Jana Kludas. 2010. Overview of the Wikipedia Retrieval Task at ImageCLEF 2010. In Proc. of CLEF."},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of ICML. 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning Transferable Visual Models from Natural Language Supervision. In Proc. of ICML. 8748--8763."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1140\/epjds\/s13688-021-00312-8"},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. of SIGIR. 2727--2737","author":"Rao Jun","year":"2022","unstructured":"Jun Rao, Fei Wang, Liang Ding, Shuhan Qi, Yibing Zhan, Weifeng Liu, and Dacheng Tao. 2022. Where Does the Performance Improvement Come From? A Reproducibility Concern about Image-Text Retrieval. In Proc. of SIGIR. 2727--2737."},{"key":"e_1_3_2_1_34_1","volume-title":"Proc. of NeurIPS: Datasets and Benchmarks Track.","author":"Schuhmann Christoph","year":"2022","unstructured":"Christoph Schuhmann, Romain Beaumont, Richard Vencu, Cade W. Gordon, Ross Wightman, Mehdi Cherti, Theo Coombes, Aarush Katta, Clayton Mullis, Mitchell Wortsman, Patrick Schramowski, Srivatsa R. Kundurthy, Katherine Crowson, Ludwig Schmidt, Robert Kaczmarczyk, and Jenia Jitsev. 2022. LAION-5B: An Open Large-scale Dataset for Training Next Generation Image-text Models. In Proc. of NeurIPS: Datasets and Benchmarks Track."},{"key":"e_1_3_2_1_35_1","volume-title":"LAION-400m: Open Dataset of CLIP-filtered 400 Million Image-text Pairs. arXiv:2111.02114","author":"Schuhmann Christoph","year":"2021","unstructured":"Christoph Schuhmann, Richard Vencu, Romain Beaumont, Robert Kaczmarczyk, Clayton Mullis, Aarush Katta, Theo Coombes, Jenia Jitsev, and Aran Komatsuzaki. 2021. LAION-400m: Open Dataset of CLIP-filtered 400 Million Image-text Pairs. arXiv:2111.02114 (2021)."},{"key":"e_1_3_2_1_36_1","volume-title":"A-OKVQA: A Benchmark for Visual Question Answering using World Knowledge. arXiv:2206.01718","author":"Schwenk Dustin","year":"2022","unstructured":"Dustin Schwenk, Apoorv Khandelwal, Christopher Clark, Kenneth Marino, and Roozbeh Mottaghi. 2022. A-OKVQA: A Benchmark for Visual Question Answering using World Knowledge. arXiv:2206.01718 (2022)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01519"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463257"},{"key":"e_1_3_2_1_39_1","volume-title":"Smith","author":"Stvilia Besiki","year":"2005","unstructured":"Besiki Stvilia, Michael B. Twidale, Les Gasser, and Linda C. Smith. 2005. Information Quality in a Community-based Encyclopedia. In Knowledge Management: Nurturing Culture, Innovation, and Technology. 101--113."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15181-1_9"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1272"},{"key":"e_1_3_2_1_42_1","volume-title":"Technical Report CNS-TR-2010-001","author":"Welinder Peter","unstructured":"Peter Welinder, Steve Branson, Takeshi Mita, Catherine Wah, Florian Schroff, Serge Belongie, and Pietro Perona. 2010. Caltech-UCSD Birds 200. Technical Report CNS-TR-2010-001. California Institute of Technology."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1177\/0093650212439062"},{"key":"e_1_3_2_1_44_1","volume-title":"Proc. of ICLR.","author":"Yao Lewei","year":"2022","unstructured":"Lewei Yao, Runhui Huang, Lu Hou, Guansong Lu, Minzhe Niu, Hang Xu, Xiaodan Liang, Zhenguo Li, Xin Jiang, and Chunjing Xu. 2022. FILIP: Fine-grained Interactive Language-Image Pre-Training. In Proc. of ICLR."},{"key":"e_1_3_2_1_45_1","first-page":"67","article-title":"From Image Descriptions to Visual Denotations: New Similarity Metrics for Semantic Inference over Event","volume":"2","author":"Young Peter","year":"2014","unstructured":"Peter Young, Alice Lai, Micah Hodosh, and Julia Hockenmaier. 2014. From Image Descriptions to Visual Denotations: New Similarity Metrics for Semantic Inference over Event Descriptions. Trans. of ACL 2 (2014), 67--78.","journal-title":"Descriptions. Trans. of ACL"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01759"},{"key":"e_1_3_2_1_47_1","volume-title":"Making a MIRACL: Multilingual Information Retrieval Across a Continuum of Languages. arXiv:2210.09984","author":"Zhang Xinyu","year":"2022","unstructured":"Xinyu Zhang, Nandan Thakur, Odunayo Ogundepo, Ehsan Kamalloo, David Alfonso-Hermelo, Xiaoguang Li, Qun Liu, Mehdi Rezagholizadeh, and Jimmy Lin. 2022. Making a MIRACL: Multilingual Information Retrieval Across a Continuum of Languages. arXiv:2210.09984 (2022)."},{"key":"e_1_3_2_1_48_1","volume-title":"Langlotz","author":"Zhang Yuhao","year":"2022","unstructured":"Yuhao Zhang, Hang Jiang, Yasuhide Miura, Christopher D. Manning, and Curtis P. Langlotz. 2022. Contrastive Learning of Medical Visual Representations from Paired Images and Text. In Proc. of MLHC. 2--25."}],"event":{"name":"SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Taipei Taiwan","acronym":"SIGIR '23","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591903","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539618.3591903","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:38:07Z","timestamp":1750178287000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591903"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,18]]},"references-count":48,"alternative-id":["10.1145\/3539618.3591903","10.1145\/3539618"],"URL":"https:\/\/doi.org\/10.1145\/3539618.3591903","relation":{},"subject":[],"published":{"date-parts":[[2023,7,18]]},"assertion":[{"value":"2023-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}