{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T05:49:05Z","timestamp":1777873745279,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737435","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"5270-5277","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CURE: A dataset for Clinical Understanding &amp; Retrieval Evaluation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-0105-7274","authenticated-orcid":false,"given":"Nadia","family":"Athar Sheikh","sequence":"first","affiliation":[{"name":"Clinia, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0509-9025","authenticated-orcid":false,"given":"Daniel","family":"Buades Marcos","sequence":"additional","affiliation":[{"name":"Clinia, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8376-0633","authenticated-orcid":false,"given":"Anne-Laure","family":"Jousse","sequence":"additional","affiliation":[{"name":"Clinia, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2630-8167","authenticated-orcid":false,"given":"Akintunde","family":"Oladipo","sequence":"additional","affiliation":[{"name":"Clinia, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7877-8829","authenticated-orcid":false,"given":"Olivier","family":"Rousseau","sequence":"additional","affiliation":[{"name":"Clinia, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0661-7189","authenticated-orcid":false,"given":"Jimmy","family":"Lin","sequence":"additional","affiliation":[{"name":"University of Waterloo, Waterloo, ON, Canada"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"AI@Meta. 2024. Llama 3 Model Card. (2024). https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.3238\/arztebl.2008.0037"},{"key":"e_1_3_2_2_3_1","unstructured":"Abhinand Balachandran. 2024. MedEmbed: Medical-Focused Embedding Models. https:\/\/github.com\/abhinand5\/MedEmbed"},{"key":"e_1_3_2_2_4_1","first-page":"65","article-title":"Managing clinical knowledge for health care improvement","volume":"1","author":"Balas E A","year":"2000","unstructured":"E A Balas and S A Boren. 2000. Managing clinical knowledge for health care improvement. Yearb. Med. Inform. 1 (2000), 65-70.","journal-title":"Yearb. Med. Inform."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-30671-1_58"},{"key":"e_1_3_2_2_6_1","volume-title":"Specter: Document-level representation learning using citation-informed transformers. arXiv preprint arXiv:2004.07180","author":"Cohan Arman","year":"2020","unstructured":"Arman Cohan, Sergey Feldman, Iz Beltagy, Doug Downey, and Daniel S Weld. 2020. Specter: Document-level representation learning using citation-informed transformers. arXiv preprint arXiv:2004.07180 (2020)."},{"key":"e_1_3_2_2_7_1","unstructured":"Cohere. 2023. Cohere Embed v3. https:\/\/cohere.com\/blog\/introducing-embed-v3"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2024.100968"},{"key":"e_1_3_2_2_9_1","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra Singh Chaplot Diego de las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2023. Mistral 7B. arxiv: 2310.06825 [cs.CL] https:\/\/arxiv.org\/abs\/2310.06825"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1161\/strokeaha.121.036141"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02068-4"},{"key":"e_1_3_2_2_12_1","volume-title":"NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models. arXiv preprint arXiv:2405.17428","author":"Lee Chankyu","year":"2024","unstructured":"Chankyu Lee, Rajarshi Roy, Mengyao Xu, Jonathan Raiman, Mohammad Shoeybi, Bryan Catanzaro, and Wei Ping. 2024b. NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models. arXiv preprint arXiv:2405.17428 (2024)."},{"key":"e_1_3_2_2_13_1","volume-title":"Gustavo Hernandez Abrego, Weiqiang Shi, Nithi Gupta, Aditya Kusupati, Prateek Jain, Siddhartha Reddy Jonnalagadda, Ming-Wei Chang, and Iftekhar Naim.","author":"Lee Jinhyuk","year":"2024","unstructured":"Jinhyuk Lee, Zhuyun Dai, Xiaoqi Ren, Blair Chen, Daniel Cer, Jeremy R. Cole, Kai Hui, Michael Boratko, Rajvi Kapadia, Wen Ding, Yi Luan, Sai Meher Karthik Duddu, Gustavo Hernandez Abrego, Weiqiang Shi, Nithi Gupta, Aditya Kusupati, Prateek Jain, Siddhartha Reddy Jonnalagadda, Ming-Wei Chang, and Iftekhar Naim. 2024a. Gecko: Versatile Text Embeddings Distilled from Large Language Models. arxiv: 2403.20327 [cs.CL] https:\/\/arxiv.org\/abs\/2403.20327"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baq036"},{"key":"e_1_3_2_2_15_1","unstructured":"Xing Han L\u00f9. 2024. BM25S: Orders of magnitude faster lexical search via eager sparse scoring. arxiv: 2407.03618 [cs.IR] https:\/\/arxiv.org\/abs\/2407.03618"},{"key":"e_1_3_2_2_16_1","unstructured":"Microsoft. 2024. Microsoft Translator - microsoft.com. https:\/\/www.microsoft.com\/en-us\/translator\/."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2210.07316"},{"key":"e_1_3_2_2_18_1","unstructured":"National Library of Medicine. 2003. PMC Open Access Subset [Internet]. https:\/\/pmc.ncbi.nlm.nih.gov\/tools\/openftlist\/."},{"key":"e_1_3_2_2_19_1","unstructured":"OpenAI. 2024a. ChatGPT-4 Turbo. https:\/\/openai.com"},{"key":"e_1_3_2_2_20_1","unstructured":"OpenAI. 2024b. OpenAI Embeddings. https:\/\/platform.openai.com\/docs\/guides\/embeddings"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","unstructured":"Jack W. Rae Sebastian Borgeaud Trevor Cai Katie Millican Jordan Hoffmann Francis Song John Aslanides Sarah Henderson Roman Ring Susannah Young Eliza Rutherford Tom Hennigan Jacob Menick Albin Cassirer Richard Powell George van den Driessche Lisa Anne Hendricks Maribeth Rauh Po-Sen Huang Amelia Glaese Johannes Welbl Sumanth Dathathri Saffron Huang Jonathan Uesato John Mellor Irina Higgins Antonia Creswell Nat McAleese Amy Wu Erich Elsen Siddhant Jayakumar Elena Buchatskaya David Budden Esme Sutherland Karen Simonyan Michela Paganini Laurent Sifre Lena Martens Xiang Lorraine Li Adhiguna Kuncoro Aida Nematzadeh Elena Gribovskaya Domenic Donato Angeliki Lazaridou Arthur Mensch Jean-Baptiste Lespiau Maria Tsimpoukelli Nikolai Grigorev Doug Fritz Thibault Sottiaux Mantas Pajarskas Toby Pohlen Zhitao Gong Daniel Toyama Cyprien de Masson d'Autume Yujia Li Tayfun Terzi Vladimir Mikulik Igor Babuschkin Aidan Clark Diego de Las Casas Aurelia Guy Chris Jones James Bradbury Matthew Johnson Blake Hechtman Laura Weidinger Iason Gabriel William Isaac Ed Lockhart Simon Osindero Laura Rimell Chris Dyer Oriol Vinyals Kareem Ayoub Jeff Stanway Lorrayne Bennett Demis Hassabis Koray Kavukcuoglu and Geoffrey Irving. 2021. Scaling Language Models: Methods Analysis &; Insights from Training Gopher. doi:10.48550\/ARXIV.2112.11446","DOI":"10.48550\/ARXIV.2112.11446"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.1057"},{"key":"e_1_3_2_2_24_1","unstructured":"Reverso. 2024. Reverso Corporate | The best corporate AI-based translator - corporate-translation.reverso.com. https:\/\/www.corporate-translation.reverso.com\/."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2021.103865"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/s0146-0005(97)80013-4"},{"key":"e_1_3_2_2_27_1","unstructured":"DeepL SE. 2024. DeepL Translate: The world's most accurate translator - pdx.www.deepl.com. https:\/\/pdx.www.deepl.com\/en\/translator."},{"key":"e_1_3_2_2_28_1","volume-title":"Lo\u00efc Barrault.","author":"Chung Mariano Coria Meglioli Yu-An","year":"2023","unstructured":"Yu-An Chung Mariano Coria Meglioli David Dale Ning Dong Mark Duppenthaler Paul-Ambroise Duquenne Brian Ellis Hady Elsahar Justin Haaheim John Hoffman Min-Jae Hwang Hirofumi Inaguma Christopher Klaiber Ilia Kulikov Pengwei Li Daniel Licht Jean Maillard Ruslan Mavlyutov Alice Rakotoarison Kaushik Ram Sadagopan Abinesh Ramakrishnan Tuan Tran Guillaume Wenzek Yilin Yang Ethan Ye Ivan Evtimov Pierre Fernandez Cynthia Gao Prangthip Hansanti Elahe Kalbassi Amanda Kallet Artyom Kozhevnikov Gabriel Mejia Robin San Roman Christophe Touret Corinne Wong Carleigh Wood Bokai Yu Pierre Andrews Can Balioglu Peng-Jen Chen Marta R. Costa-juss\u00e0 Maha Elbayad Hongyu Gong Francisco Guzm\u00e1n Kevin Heffernan Somya Jain Justine Kao Ann Lee Xutai Ma Alex Mourachko Benjamin Peloquin Juan Pino Sravya Popuri Christophe Ropers Safiyyah Saleem Holger Schwenk Anna Sun Paden Tomasello Changhan Wang Jeff Wang Skyler Wang Mary Williamson Seamless Communication, Lo\u00efc Barrault. 2023. Seamless: Multilingual Expressive and Streaming Speech Translation. ArXiv."},{"key":"e_1_3_2_2_29_1","volume-title":"Michael G\u00fcnther, Bo Wang, Markus Krimmel, Feng Wang, Georgios Mastrapas, Andreas Koukounas, Andreas Koukounas, Nan Wang, and Han Xiao.","author":"Sturua Saba","year":"2024","unstructured":"Saba Sturua, Isabelle Mohr, Mohammad Kalim Akram, Michael G\u00fcnther, Bo Wang, Markus Krimmel, Feng Wang, Georgios Mastrapas, Andreas Koukounas, Andreas Koukounas, Nan Wang, and Han Xiao. 2024. jina-embeddings-v3: Multilingual Embeddings With Task LoRA. arxiv: 2409.10173 [cs.CL] https:\/\/arxiv.org\/abs\/2409.10173"},{"key":"e_1_3_2_2_30_1","volume-title":"Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas R\u00fcckl\u00e9, Abhishek Srivastava, and Iryna Gurevych. 2021. Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663 (2021)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","unstructured":"Shivani Upadhyay Ehsan Kamalloo and Jimmy Lin. 2024. LLMs Can Patch Up Missing Relevance Judgments in Evaluation. doi:10.48550\/ARXIV.2405.04727","DOI":"10.48550\/ARXIV.2405.04727"},{"key":"e_1_3_2_2_32_1","unstructured":"Daniel Vila-Suero and Francisco Aranda. 2023. Argilla - Open-source framework for data-centric NLP. https:\/\/github.com\/argilla-io\/argilla."},{"key":"e_1_3_2_2_33_1","volume-title":"Madeleine van Zuylen, Arman Cohan, and Hannaneh Hajishirzi.","author":"Wadden David","year":"2020","unstructured":"David Wadden, Shanchuan Lin, Kyle Lo, Lucy Lu Wang, Madeleine van Zuylen, Arman Cohan, and Hannaneh Hajishirzi. 2020. Fact or fiction: Verifying scientific claims. arXiv preprint arXiv:2004.14974 (2020)."},{"key":"e_1_3_2_2_34_1","unstructured":"Liang Wang Nan Yang Xiaolong Huang Linjun Yang Rangan Majumder and Furu Wei. 2024. Multilingual E5 Text Embeddings: A Technical Report. arXiv preprint arXiv:2402.05672 (2024)."},{"key":"e_1_3_2_2_35_1","unstructured":"Shitao Xiao Zheng Liu Peitian Zhang and Niklas Muennighoff. 2023. C-Pack: Packaged Resources To Advance General Chinese Embedding. arxiv: 2309.07597 [cs.CL]"},{"key":"e_1_3_2_2_36_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang et al. 2024. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"e_1_3_2_2_37_1","unstructured":"Puxuan Yu Luke Merrick Gaurav Nuti and Daniel Campos. 2024. Arctic-Embed 2.0: Multilingual Retrieval Without Compromise. arxiv: 2412.04506 [cs.CL] https:\/\/arxiv.org\/abs\/2412.04506"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00595"},{"key":"e_1_3_2_2_39_1","unstructured":"Xin Zhang Yanzhao Zhang Dingkun Long Wen Xie Ziqi Dai Jialong Tang Huan Lin Baosong Yang Pengjun Xie Fei Huang Meishan Zhang Wenjie Li and Min Zhang. 2024. mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval. arxiv: 2407.19669 [cs.CL] https:\/\/arxiv.org\/abs\/2407.19669"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737435","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:18:44Z","timestamp":1777573124000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737435"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":39,"alternative-id":["10.1145\/3711896.3737435","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737435","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}