{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:48:52Z","timestamp":1774352932221,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T00:00:00Z","timestamp":1741564800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,10]]},"DOI":"10.1145\/3701551.3703514","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T12:33:36Z","timestamp":1740573216000},"page":"336-344","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Writing Style Matters: An Examination of Bias and Fairness in Information Retrieval Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1326-8159","authenticated-orcid":false,"given":"Hongliu","family":"Cao","sequence":"first","affiliation":[{"name":"Amadeus, Nice, France"}]}],"member":"320","published-online":{"date-parts":[[2025,3,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Nicholas Meade, and Siva Reddy.","author":"Adlakha Vaibhav","year":"2023","unstructured":"Vaibhav Adlakha, Parishad BehnamGhader, Xing Han Lu, Nicholas Meade, and Siva Reddy. 2023. Evaluating correctness and faithfulness of instruction-following models for question answering. arXiv preprint arXiv:2307.16877 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00667"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00471"},{"key":"e_1_3_2_1_4_1","volume-title":"LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders. arXiv preprint arXiv:2404.05961","author":"BehnamGhader Parishad","year":"2024","unstructured":"Parishad BehnamGhader, Vaibhav Adlakha, Marius Mosbach, Dzmitry Bahdanau, Nicolas Chapados, and Siva Reddy. 2024. LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders. arXiv preprint arXiv:2404.05961 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_6_1","volume-title":"Recent advances in text embedding: A Comprehensive Review of Top-Performing Methods on the MTEB Benchmark. arXiv preprint arXiv:2406.01607","author":"Cao Hongliu","year":"2024","unstructured":"Hongliu Cao. 2024. Recent advances in text embedding: A Comprehensive Review of Top-Performing Methods on the MTEB Benchmark. arXiv preprint arXiv:2406.01607 (2024)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191995"},{"key":"e_1_3_2_1_8_1","volume-title":"Humans or llms as the judge? a study on judgement biases. arXiv preprint arXiv:2402.10669","author":"Chen Guiming Hardy","year":"2024","unstructured":"Guiming Hardy Chen, Shunian Chen, Ziche Liu, Feng Jiang, and Benyou Wang. 2024a. Humans or llms as the judge? a study on judgement biases. arXiv preprint arXiv:2402.10669 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Spiral of Silences: How is Large Language Model Killing Information Retrieval?--A Case Study on Open Domain Question Answering. arXiv preprint arXiv:2404.10496","author":"Chen Xiaoyang","year":"2024","unstructured":"Xiaoyang Chen, Ben He, Hongyu Lin, Xianpei Han, Tianshu Wang, Boxi Cao, Le Sun, and Yingfei Sun. 2024b. Spiral of Silences: How is Large Language Model Killing Information Retrieval?--A Case Study on Open Domain Question Answering. arXiv preprint arXiv:2404.10496 (2024)."},{"key":"e_1_3_2_1_10_1","first-page":"1","article-title":"Scaling instruction-finetuned language models","volume":"25","author":"Chung Hyung Won","year":"2024","unstructured":"Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Yunxuan Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, et al. 2024. Scaling instruction-finetuned language models. Journal of Machine Learning Research, Vol. 25, 70 (2024), 1--53.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_11_1","volume-title":"Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116","author":"Conneau Alexis","year":"2019","unstructured":"Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzm\u00e1n, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Unifying Bias and Unfairness in Information Retrieval: A Survey of Challenges and Opportunities with Large Language Models. arXiv preprint arXiv:2404.11457","author":"Dai Sunhao","year":"2024","unstructured":"Sunhao Dai, Chen Xu, Shicheng Xu, Liang Pang, Zhenhua Dong, and Jun Xu. 2024. Unifying Bias and Unfairness in Information Retrieval: A Survey of Challenges and Opportunities with Large Language Models. arXiv preprint arXiv:2404.11457 (2024)."},{"key":"e_1_3_2_1_13_1","volume-title":"Llms may dominate information access: Neural retrievers are biased towards llm-generated texts. arXiv preprint arXiv:2310.20501","author":"Dai Sunhao","year":"2023","unstructured":"Sunhao Dai, Yuqi Zhou, Liang Pang, Weihao Liu, Xiaolin Hu, Yong Liu, Xiao Zhang, and Jun Xu. 2023. Llms may dominate information access: Neural retrievers are biased towards llm-generated texts. arXiv preprint arXiv:2310.20501 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_15_1","volume-title":"Ragas: Automated evaluation of retrieval augmented generation. arXiv preprint arXiv:2309.15217","author":"Es Shahul","year":"2023","unstructured":"Shahul Es, Jithin James, Luis Espinosa-Anke, and Steven Schockaert. 2023. Ragas: Automated evaluation of retrieval augmented generation. arXiv preprint arXiv:2309.15217 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Information retrieval: recent advances and beyond","author":"Hambarde Kailash A","year":"2023","unstructured":"Kailash A Hambarde and Hugo Proenca. 2023. Information retrieval: recent advances and beyond. IEEE Access (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56060-6_24"},{"key":"e_1_3_2_1_18_1","volume-title":"A Survey on Retrieval-Augmented Text Generation for Large Language Models. arXiv preprint arXiv:2404.10981","author":"Huang Yizheng","year":"2024","unstructured":"Yizheng Huang and Jimmy Huang. 2024. A Survey on Retrieval-Augmented Text Generation for Large Language Models. arXiv preprint arXiv:2404.10981 (2024)."},{"key":"e_1_3_2_1_19_1","volume-title":"GPT-4o: The Cutting-Edge Advancement in Multimodal LLM. Authorea Preprints","author":"Islam Raisa","year":"2024","unstructured":"Raisa Islam and Owana Marzia Moushi. 2024. GPT-4o: The Cutting-Edge Advancement in Multimodal LLM. Authorea Preprints (2024)."},{"key":"e_1_3_2_1_20_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_21_1","first-page":"30233","article-title":"Matryoshka representation learning","volume":"35","author":"Kusupati Aditya","year":"2022","unstructured":"Aditya Kusupati, Gantavya Bhatt, Aniket Rege, Matthew Wallingford, Aditya Sinha, Vivek Ramanujan, William Howard-Snyder, Kaifeng Chen, Sham Kakade, Prateek Jain, et al. 2022. Matryoshka representation learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 30233--30249.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","volume-title":"Gecko: Versatile Text Embeddings Distilled from Large Language Models. arXiv preprint arXiv:2403.20327","author":"Lee Jinhyuk","year":"2024","unstructured":"Jinhyuk Lee, Zhuyun Dai, Xiaoqi Ren, Blair Chen, Daniel Cer, Jeremy R Cole, Kai Hui, Michael Boratko, Rajvi Kapadia, Wen Ding, et al. 2024. Gecko: Versatile Text Embeddings Distilled from Large Language Models. arXiv preprint arXiv:2403.20327 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"Latent retrieval for weakly supervised open domain question answering. arXiv preprint arXiv:1906.00300","author":"Lee Kenton","year":"2019","unstructured":"Kenton Lee, Ming-Wei Chang, and Kristina Toutanova. 2019. Latent retrieval for weakly supervised open domain question answering. arXiv preprint arXiv:1906.00300 (2019)."},{"key":"e_1_3_2_1_24_1","volume-title":"Towards general text embeddings with multi-stage contrastive learning. arXiv preprint arXiv:2308.03281","author":"Li Zehan","year":"2023","unstructured":"Zehan Li, Xin Zhang, Yanzhao Zhang, Dingkun Long, Pengjun Xie, and Meishan Zhang. 2023. Towards general text embeddings with multi-stage contrastive learning. arXiv preprint arXiv:2308.03281 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"G-eval: Nlg evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:2303.16634","author":"Liu Yang","year":"2023","unstructured":"Yang Liu, Dan Iter, Yichong Xu, Shuohang Wang, Ruochen Xu, and Chenguang Zhu. 2023a. G-eval: Nlg evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:2303.16634 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"Nafise Sadat Moosavi, and Chenghua Lin","author":"Liu Yiqi","year":"2023","unstructured":"Yiqi Liu, Nafise Sadat Moosavi, and Chenghua Lin. 2023b. Llms as narcissistic evaluators: When ego inflates evaluation scores. arXiv preprint arXiv:2311.09766 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Introduction to information retrieval","author":"Manning Christopher D","unstructured":"Christopher D Manning, Prabhakar Raghavan, and Hinrich Sch\u00fctze. 2008. Introduction to information retrieval. Cambridge university press."},{"key":"e_1_3_2_1_28_1","volume-title":"Caiming Xiong, Yingbo Zhou, and Semih Yavuz.","author":"Rui","year":"2024","unstructured":"Rui Meng*, Ye Liu*, Shafiq Rayhan Joty, Caiming Xiong, Yingbo Zhou, and Semih Yavuz. 2024. SFR-Embedding-2: Advanced Text Embedding with Multi-stage Training. https:\/\/huggingface.co\/Salesforce\/SFR-Embedding-2_R"},{"key":"e_1_3_2_1_29_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_1_30_1","volume-title":"MTEB: Massive text embedding benchmark. arXiv preprint arXiv:2210.07316","author":"Muennighoff Niklas","year":"2022","unstructured":"Niklas Muennighoff, Nouamane Tazi, Lo\"ic Magne, and Nils Reimers. 2022. MTEB: Massive text embedding benchmark. arXiv preprint arXiv:2210.07316 (2022)."},{"key":"e_1_3_2_1_31_1","volume-title":"Deep contextualized word representations. arXiv preprint arXiv:1802.05365","author":"Neumann MPM","year":"2018","unstructured":"MPM Neumann, M Iyyer, M Gardner, C Clark, K Lee, and L Zettlemoyer. 2018. Deep contextualized word representations. arXiv preprint arXiv:1802.05365 (2018)."},{"key":"e_1_3_2_1_32_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_34_1","volume-title":"Blinded by Generated Contexts: How Language Models Merge Generated and Retrieved Contexts for Open-Domain QA? arXiv preprint arXiv:2401.11911","author":"Tan Hexiang","year":"2024","unstructured":"Hexiang Tan, Fei Sun, Wanli Yang, Yuanzhuo Wang, Qi Cao, and Xueqi Cheng. 2024. Blinded by Generated Contexts: How Language Models Merge Generated and Retrieved Contexts for Open-Domain QA? arXiv preprint arXiv:2401.11911 (2024)."},{"key":"e_1_3_2_1_35_1","volume-title":"Found in the middle: Permutation self-consistency improves listwise ranking in large language models. arXiv preprint arXiv:2310.07712","author":"Tang Raphael","year":"2023","unstructured":"Raphael Tang, Xinyu Zhang, Xueguang Ma, Jimmy Lin, and Ferhan Ture. 2023. Found in the middle: Permutation self-consistency improves listwise ranking in large language models. arXiv preprint arXiv:2310.07712 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Stanford alpaca: an instruction-following llama model","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori B Hashimoto. 2023. Stanford alpaca: an instruction-following llama model (2023). URL https:\/\/github. com\/tatsu-lab\/stanford_alpaca, Vol. 1, 9 (2023)."},{"key":"e_1_3_2_1_37_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Improving text embeddings with large language models. arXiv preprint arXiv:2401.00368","author":"Wang Liang","year":"2023","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Linjun Yang, Rangan Majumder, and Furu Wei. 2023. Improving text embeddings with large language models. arXiv preprint arXiv:2401.00368 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Multilingual e5 text embeddings: A technical report. arXiv preprint arXiv:2402.05672","author":"Wang Liang","year":"2024","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Linjun Yang, Rangan Majumder, and Furu Wei. 2024. Multilingual e5 text embeddings: A technical report. arXiv preprint arXiv:2402.05672 (2024)."},{"key":"e_1_3_2_1_40_1","volume-title":"How faithful are RAG models? Quantifying the tug-of-war between RAG and LLMs' internal prior. arXiv preprint arXiv:2404.10198","author":"Wu Kevin","year":"2024","unstructured":"Kevin Wu, Eric Wu, and James Zou. 2024. How faithful are RAG models? Quantifying the tug-of-war between RAG and LLMs' internal prior. arXiv preprint arXiv:2404.10198 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"C-pack: Packaged resources to advance general chinese embedding. arXiv preprint arXiv:2309.07597","author":"Xiao Shitao","year":"2023","unstructured":"Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighof. 2023. C-pack: Packaged resources to advance general chinese embedding. arXiv preprint arXiv:2309.07597 (2023)."},{"key":"e_1_3_2_1_42_1","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang et al. 2024. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"e_1_3_2_1_43_1","volume-title":"HotpotQA: A dataset for diverse, explainable multi-hop question answering. arXiv preprint arXiv:1809.09600","author":"Yang Zhilin","year":"2018","unstructured":"Zhilin Yang, Peng Qi, Saizheng Zhang, Yoshua Bengio, William W Cohen, Ruslan Salakhutdinov, and Christopher D Manning. 2018. HotpotQA: A dataset for diverse, explainable multi-hop question answering. arXiv preprint arXiv:1809.09600 (2018)."}],"event":{"name":"WSDM '25: The Eighteenth ACM International Conference on Web Search and Data Mining","location":"Hannover Germany","acronym":"WSDM '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Eighteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703514","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701551.3703514","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T09:14:45Z","timestamp":1755767685000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701551.3703514"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,10]]},"references-count":43,"alternative-id":["10.1145\/3701551.3703514","10.1145\/3701551"],"URL":"https:\/\/doi.org\/10.1145\/3701551.3703514","relation":{},"subject":[],"published":{"date-parts":[[2025,3,10]]},"assertion":[{"value":"2025-03-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}