{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:06:12Z","timestamp":1775815572287,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Youth Innovation Promotion Association CAS","award":["2023111"],"award-info":[{"award-number":["2023111"]}]},{"name":"Beijing Natural Science Foundation","award":["4222029"],"award-info":[{"award-number":["4222029"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276248, 62376275,62377044,62076234"],"award-info":[{"award-number":["62276248, 62376275,62377044,62076234"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFA1008704"],"award-info":[{"award-number":["2023YFA1008704"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671882","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:55:12Z","timestamp":1724561712000},"page":"526-537","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":27,"title":["Neural Retrievers are Biased Towards LLM-Generated Content"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7549-0860","authenticated-orcid":false,"given":"Sunhao","family":"Dai","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2453-9138","authenticated-orcid":false,"given":"Yuqi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1161-8546","authenticated-orcid":false,"given":"Liang","family":"Pang","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of AI Safety Institute of Computing Technology Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0481-0246","authenticated-orcid":false,"given":"Weihao","family":"Liu","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5493-5779","authenticated-orcid":false,"given":"Xiaolin","family":"Hu","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6739-621X","authenticated-orcid":false,"given":"Yong","family":"Liu","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7397-5632","authenticated-orcid":false,"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8795-8953","authenticated-orcid":false,"given":"Gang","family":"Wang","sequence":"additional","affiliation":[{"name":"Noah's Ark Lab, Huawei, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7170-111X","authenticated-orcid":false,"given":"Jun","family":"Xu","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1345"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2023.08.001"},{"key":"e_1_3_2_2_3_1","volume-title":"Large language models (LLM) and ChatGPT: what will the impact on nuclear medicine be? European journal of nuclear medicine and molecular imaging","author":"Alberts Ian L","year":"2023","unstructured":"Ian L Alberts, Lorenzo Mercolli, Thomas Pyka, George Prenosil, Kuangyu Shi, Axel Rominger, and Ali Afshar-Oromieh. 2023. Large language models (LLM) and ChatGPT: what will the impact on nuclear medicine be? European journal of nuclear medicine and molecular imaging, Vol. 50, 6 (2023), 1549--1552."},{"key":"e_1_3_2_2_4_1","volume-title":"Hossein Babaei, Daniel LeJeune, Ali Siahkoohi, and Richard G Baraniuk.","author":"Alemohammad Sina","year":"2023","unstructured":"Sina Alemohammad, Josue Casco-Rodriguez, Lorenzo Luzi, Ahmed Imtiaz Humayun, Hossein Babaei, Daniel LeJeune, Ali Siahkoohi, and Richard G Baraniuk. 2023. Self-consuming generative models go mad. arXiv preprint arXiv:2307.01850 (2023)."},{"key":"e_1_3_2_2_5_1","volume-title":"Online searches to evaluate misinformation can increase its perceived veracity. Nature","author":"Aslett Kevin","year":"2023","unstructured":"Kevin Aslett, Zeve Sanderson, William Godel, Nathaniel Persily, Jonathan Nagler, and Joshua A Tucker. 2023. Online searches to evaluate misinformation can increase its perceived veracity. Nature (2023), 1--9."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/860435.860505"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Yejin Bang Samuel Cahyawijaya Nayeon Lee Wenliang Dai Dan Su Bryan Wilie Holy Lovenia Ziwei Ji Tiezheng Yu Willy Chung et al. 2023. A multitask multilingual multimodal evaluation of chatgpt on reasoning hallucination and interactivity. arXiv preprint arXiv:2302.04023 (2023).","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"e_1_3_2_2_8_1","volume-title":"Ya-Qin Zhang, Lan Xue, Shai Shalev-Shwartz, Gillian Hadfield, et al.","author":"Bengio Yoshua","year":"2023","unstructured":"Yoshua Bengio, Geoffrey Hinton, Andrew Yao, Dawn Song, Pieter Abbeel, Yuval Noah Harari, Ya-Qin Zhang, Lan Xue, Shai Shalev-Shwartz, Gillian Hadfield, et al. 2023. Managing ai risks in an era of rapid progress. arXiv preprint arXiv:2310.17688 (2023)."},{"key":"e_1_3_2_2_9_1","unstructured":"James Betker Gabriel Goh Li Jing Tim Brooks Jianfeng Wang Linjie Li Long Ouyang Juntang Zhuang Joyce Lee Yufei Guo Wesam Manassra Prafulla Dhariwal Casey Chu and Yunxin Jiao. 2023. Improving Image Generation with Better Captions. (2023)."},{"key":"e_1_3_2_2_10_1","volume-title":"Large Language Models Suffer From Their Own Output: An Analysis of the Self-Consuming Training Loop. arXiv preprint arXiv:2311.16822","author":"Briesch Martin","year":"2023","unstructured":"Martin Briesch, Dominik Sobania, and Franz Rothlauf. 2023. Large Language Models Suffer From Their Own Output: An Analysis of the Self-Consuming Training Loop. arXiv preprint arXiv:2311.16822 (2023)."},{"key":"e_1_3_2_2_11_1","volume-title":"Yuanzhi Li, Scott Lundberg, et al.","author":"Bubeck S\u00e9bastien","year":"2023","unstructured":"S\u00e9bastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, et al. 2023. Sparks of artificial general intelligence: Early experiments with gpt-4. arXiv preprint arXiv:2303.12712 (2023)."},{"key":"e_1_3_2_2_12_1","volume-title":"A comprehensive survey of ai-generated content (aigc): A history of generative ai from gan to chatgpt. arXiv preprint arXiv:2303.04226","author":"Cao Yihan","year":"2023","unstructured":"Yihan Cao, Siyu Li, Yixin Liu, Zhiling Yan, Yutong Dai, Philip S Yu, and Lichao Sun. 2023. A comprehensive survey of ai-generated content (aigc): A history of generative ai from gan to chatgpt. arXiv preprint arXiv:2303.04226 (2023)."},{"key":"e_1_3_2_2_13_1","volume-title":"Can LLM-Generated Misinformation Be Detected? arXiv preprint arXiv:2309.13788","author":"Chen Canyu","year":"2023","unstructured":"Canyu Chen and Kai Shu. 2023. Can LLM-Generated Misinformation Be Detected? arXiv preprint arXiv:2309.13788 (2023)."},{"key":"e_1_3_2_2_14_1","volume-title":"Discrete Prompt Optimization via Constrained Generation for Zero-shot Re-ranker. arXiv preprint arXiv:2305.13729","author":"Cho Sukmin","year":"2023","unstructured":"Sukmin Cho, Soyeong Jeong, Jeongyeon Seo, and Jong C Park. 2023. Discrete Prompt Optimization via Constrained Generation for Zero-shot Re-ranker. arXiv preprint arXiv:2305.13729 (2023)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3610646"},{"key":"e_1_3_2_2_16_1","volume-title":"Promptagator: Few-shot dense retrieval from 8 examples. arXiv preprint arXiv:2209.11755","author":"Dai Zhuyun","year":"2022","unstructured":"Zhuyun Dai, Vincent Y Zhao, Ji Ma, Yi Luan, Jianmo Ni, Jing Lu, Anton Bakalov, Kelvin Guu, Keith B Hall, and Ming-Wei Chang. 2022. Promptagator: Few-shot dense retrieval from 8 examples. arXiv preprint arXiv:2209.11755 (2022)."},{"key":"e_1_3_2_2_17_1","volume-title":"Matthew Aitchison, Laurent Orseau, et al.","author":"Del\u00e9tang Gr\u00e9goire","year":"2023","unstructured":"Gr\u00e9goire Del\u00e9tang, Anian Ruoss, Paul-Ambroise Duquenne, Elliot Catt, Tim Genewein, Christopher Mattern, Jordi Grau-Moya, Li Kevin Wenliang, Matthew Aitchison, Laurent Orseau, et al. 2023. Language modeling is compression. arXiv preprint arXiv:2309.10668 (2023)."},{"key":"e_1_3_2_2_18_1","volume-title":"Toxicity in chatgpt: Analyzing persona-assigned language models. arXiv preprint arXiv:2304.05335","author":"Deshpande Ameet","year":"2023","unstructured":"Ameet Deshpande, Vishvak Murahari, Tanmay Rajpurohit, Ashwin Kalyan, and Karthik Narasimhan. 2023. Toxicity in chatgpt: Analyzing persona-assigned language models. arXiv preprint arXiv:2304.05335 (2023)."},{"key":"e_1_3_2_2_19_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171--4186","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171--4186."},{"key":"e_1_3_2_2_20_1","volume-title":"Recommender systems in the era of large language models (llms). arXiv preprint arXiv:2307.02046","author":"Fan Wenqi","year":"2023","unstructured":"Wenqi Fan, Zihuai Zhao, Jiatong Li, Yunqing Liu, Xiaowei Mei, Yiqi Wang, Jiliang Tang, and Qing Li. 2023. Recommender systems in the era of large language models (llms). arXiv preprint arXiv:2307.02046 (2023)."},{"key":"e_1_3_2_2_21_1","volume-title":"How Close is ChatGPT to Human Experts? Comparison Corpus, Evaluation, and Detection. arXiv preprint arXiv:2301.07597","author":"Guo Biyang","year":"2023","unstructured":"Biyang Guo, Xin Zhang, Ziyuan Wang, Minqi Jiang, Jinran Nie, Yuxuan Ding, Jianwei Yue, and Yupeng Wu. 2023. How Close is ChatGPT to Human Experts? Comparison Corpus, Evaluation, and Detection. arXiv preprint arXiv:2301.07597 (2023)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3486250"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2019.102067"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/1316689.1316740"},{"key":"e_1_3_2_2_25_1","volume-title":"Machine-Made Media: Monitoring the Mobilization of Machine-Generated Articles on Misinformation and Mainstream News Websites. arXiv preprint arXiv:2305.09820","author":"Hanley Hans WA","year":"2023","unstructured":"Hans WA Hanley and Zakir Durumeric. 2023. Machine-Made Media: Monitoring the Mobilization of Machine-Generated Articles on Misinformation and Mainstream News Websites. arXiv preprint arXiv:2305.09820 (2023)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1111\/1911-3846.12832"},{"key":"e_1_3_2_2_28_1","volume-title":"Unsupervised dense information retrieval with contrastive learning. arXiv preprint arXiv:2112.09118","author":"Izacard Gautier","year":"2021","unstructured":"Gautier Izacard, Mathilde Caron, Lucas Hosseini, Sebastian Riedel, Piotr Bojanowski, Armand Joulin, and Edouard Grave. 2021. Unsupervised dense information retrieval with contrastive learning. arXiv preprint arXiv:2112.09118 (2021)."},{"key":"e_1_3_2_2_29_1","volume-title":"Few-shot learning with retrieval augmented language models. arXiv preprint arXiv:2208.03299","author":"Izacard Gautier","year":"2022","unstructured":"Gautier Izacard, Patrick Lewis, Maria Lomeli, Lucas Hosseini, Fabio Petroni, Timo Schick, Jane Dwivedi-Yu, Armand Joulin, Sebastian Riedel, and Edouard Grave. 2022. Few-shot learning with retrieval augmented language models. arXiv preprint arXiv:2208.03299 (2022)."},{"key":"e_1_3_2_2_30_1","volume-title":"Disinformation Detection: An Evolving Challenge in the Age of LLMs. arXiv preprint arXiv:2309.15847","author":"Jiang Bohan","year":"2023","unstructured":"Bohan Jiang, Zhen Tan, Ayushi Nirmal, and Huan Liu. 2023. Disinformation Detection: An Evolving Challenge in the Age of LLMs. arXiv preprint arXiv:2309.15847 (2023)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1980.1102314"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_2_33_1","volume-title":"Learning to rank for information retrieval and natural language processing","author":"Hang Li.","unstructured":"Hang Li. 2022. Learning to rank for information retrieval and natural language processing. Springer Nature."},{"key":"e_1_3_2_2_34_1","volume-title":"Foundations and Trends\u00ae in Information Retrieval","volume":"3","author":"Tie-Yan","year":"2009","unstructured":"Tie-Yan Liu et al. 2009. Learning to rank for information retrieval. Foundations and Trends\u00ae in Information Retrieval, Vol. 3, 3 (2009), 225--331."},{"key":"e_1_3_2_2_35_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_2_36_1","volume-title":"An introduction to information retrieval","author":"Manning Christopher D","unstructured":"Christopher D Manning. 2009. An introduction to information retrieval. Cambridge university press."},{"key":"e_1_3_2_2_37_1","volume-title":"Document ranking with a pretrained sequence-to-sequence model. arXiv preprint arXiv:2003.06713","author":"Nogueira Rodrigo","year":"2020","unstructured":"Rodrigo Nogueira, Zhiying Jiang, and Jimmy Lin. 2020. Document ranking with a pretrained sequence-to-sequence model. arXiv preprint arXiv:2003.06713 (2020)."},{"key":"e_1_3_2_2_38_1","volume-title":"Roberto de Alencar Lotufo, and Rodrigo Nogueira","author":"Nunes Desnes","year":"2023","unstructured":"Desnes Nunes, Ricardo Primi, Ramon Pires, Roberto de Alencar Lotufo, and Rodrigo Nogueira. 2023. Evaluating GPT-3.5 and GPT-4 Models on Brazilian University Admission Exams. ArXiv, Vol. abs\/2303.17003 (2023)."},{"key":"e_1_3_2_2_39_1","volume-title":"On the Risk of Misinformation Pollution with Large Language Models. arXiv preprint arXiv:2305.13661","author":"Pan Yikang","year":"2023","unstructured":"Yikang Pan, Liangming Pan, Wenhu Chen, Preslav Nakov, Min-Yen Kan, and William Yang Wang. 2023. On the Risk of Misinformation Pollution with Large Language Models. arXiv preprint arXiv:2305.13661 (2023)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_2_42_1","volume-title":"Can AI-Generated Text be Reliably Detected? ArXiv","author":"Sadasivan Vinu Sankar","year":"2023","unstructured":"Vinu Sankar Sadasivan, Aounon Kumar, S. Balasubramanian, Wenxiao Wang, and Soheil Feizi. 2023. Can AI-Generated Text be Reliably Detected? ArXiv, Vol. abs\/2303.11156 (2023). https:\/\/api.semanticscholar.org\/CorpusID:257631570"},{"key":"e_1_3_2_2_43_1","volume-title":"Replug: Retrieval-augmented black-box language models. arXiv preprint arXiv:2301.12652","author":"Shi Weijia","year":"2023","unstructured":"Weijia Shi, Sewon Min, Michihiro Yasunaga, Minjoon Seo, Rich James, Mike Lewis, Luke Zettlemoyer, and Wen-tau Yih. 2023. Replug: Retrieval-augmented black-box language models. arXiv preprint arXiv:2301.12652 (2023)."},{"key":"e_1_3_2_2_44_1","volume-title":"Model dementia: Generated data makes models forget. arXiv e-prints","author":"Shumailov Ilia","year":"2023","unstructured":"Ilia Shumailov, Zakhar Shumaylov, Yiren Zhao, Yarin Gal, Nicolas Papernot, and Ross Anderson. 2023. Model dementia: Generated data makes models forget. arXiv e-prints (2023), arXiv--2305."},{"key":"e_1_3_2_2_45_1","first-page":"35","article-title":"Modern information retrieval: A brief overview","volume":"24","author":"Amit Singhal","year":"2001","unstructured":"Amit Singhal et al. 2001. Modern information retrieval: A brief overview. IEEE Data Eng. Bull., Vol. 24, 4 (2001), 35--43.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1108\/eb026526"},{"key":"e_1_3_2_2_47_1","volume-title":"AI model GPT-3 (dis) informs us better than humans. arXiv preprint arXiv:2301.11924","author":"Spitale Giovanni","year":"2023","unstructured":"Giovanni Spitale, Nikola Biller-Andorno, and Federico Germani. 2023. AI model GPT-3 (dis) informs us better than humans. arXiv preprint arXiv:2301.11924 (2023)."},{"key":"e_1_3_2_2_48_1","volume-title":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track. 492--501","author":"Srinivasan Krishna","year":"2022","unstructured":"Krishna Srinivasan, Karthik Raman, Anupam Samanta, Lingrui Liao, Luca Bertelli, and Michael Bendersky. 2022. QUILL: Query Intent with Large Language Models using Retrieval Augmentation and Multi-stage Distillation. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track. 492--501."},{"key":"e_1_3_2_2_49_1","volume-title":"Jonibek Mansurov, Di Wang, and Preslav Nakov.","author":"Su Jinyan","year":"2023","unstructured":"Jinyan Su, Terry Yue Zhuo, Jonibek Mansurov, Di Wang, and Preslav Nakov. 2023. Fake News Detectors are Biased against Texts Generated by Large Language Models. arXiv preprint arxiv:2309.08674 (2023)."},{"key":"e_1_3_2_2_50_1","volume-title":"Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agent. arXiv preprint arXiv:2304.09542","author":"Sun Weiwei","year":"2023","unstructured":"Weiwei Sun, Lingyong Yan, Xinyu Ma, Pengjie Ren, Dawei Yin, and Zhaochun Ren. 2023. Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agent. arXiv preprint arXiv:2304.09542 (2023)."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.30"},{"key":"e_1_3_2_2_52_1","volume-title":"BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track.","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas R\u00fcckl\u00e9, Abhishek Srivastava, and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track."},{"key":"e_1_3_2_2_53_1","volume-title":"Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel Shu Wei Ting.","author":"Thirunavukarasu Arun James","year":"2023","unstructured":"Arun James Thirunavukarasu, Darren Shu Jeng Ting, Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel Shu Wei Ting. 2023. Large language models in medicine. Nature medicine, Vol. 29, 8 (2023), 1930--1940."},{"key":"e_1_3_2_2_54_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_55_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_2_56_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_57_1","volume-title":"Madeleine van Zuylen, Arman Cohan, and Hannaneh Hajishirzi.","author":"Wadden David","year":"2020","unstructured":"David Wadden, Shanchuan Lin, Kyle Lo, Lucy Lu Wang, Madeleine van Zuylen, Arman Cohan, and Hannaneh Hajishirzi. 2020. Fact or fiction: Verifying scientific claims. arXiv preprint arXiv:2004.14974 (2020)."},{"key":"e_1_3_2_2_58_1","volume-title":"BERT has a mouth, and it must speak: BERT as a Markov random field language model. arXiv preprint arXiv:1902.04094","author":"Wang Alex","year":"2019","unstructured":"Alex Wang and Kyunghyun Cho. 2019. BERT has a mouth, and it must speak: BERT as a Markov random field language model. arXiv preprint arXiv:1902.04094 (2019)."},{"key":"e_1_3_2_2_59_1","volume-title":"Language models with transformers. arXiv preprint arXiv:1904.09408","author":"Wang Chenguang","year":"2019","unstructured":"Chenguang Wang, Mu Li, and Alexander J Smola. 2019. Language models with transformers. arXiv preprint arXiv:1904.09408 (2019)."},{"key":"e_1_3_2_2_60_1","volume-title":"Query2doc: Query Expansion with Large Language Models. arXiv preprint arXiv:2303.07678","author":"Wang Liang","year":"2023","unstructured":"Liang Wang, Nan Yang, and Furu Wei. 2023. Query2doc: Query Expansion with Large Language Models. arXiv preprint arXiv:2303.07678 (2023)."},{"key":"e_1_3_2_2_61_1","volume-title":"Security and privacy on generative data in aigc: A survey. arXiv preprint arXiv:2309.09435","author":"Wang Tao","year":"2023","unstructured":"Tao Wang, Yushu Zhang, Shuren Qi, Ruoyu Zhao, Zhihua Xia, and Jian Weng. 2023. Security and privacy on generative data in aigc: A survey. arXiv preprint arXiv:2309.09435 (2023)."},{"key":"e_1_3_2_2_62_1","first-page":"5776","article-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers","volume":"33","author":"Wang Wenhui","year":"2020","unstructured":"Wenhui Wang, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. 2020. Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. Advances in Neural Information Processing Systems, Vol. 33 (2020), 5776--5788.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467105"},{"key":"e_1_3_2_2_64_1","unstructured":"Jason Wei Yi Tay Rishi Bommasani Colin Raffel Barret Zoph Sebastian Borgeaud Dani Yogatama Maarten Bosma Denny Zhou Donald Metzler et al. 2022. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)."},{"key":"e_1_3_2_2_65_1","volume-title":"Ai-generated content (aigc): A survey. arXiv preprint arXiv:2304.06632","author":"Wu Jiayang","year":"2023","unstructured":"Jiayang Wu, Wensheng Gan, Zefeng Chen, Shicheng Wan, and Hong Lin. 2023. Ai-generated content (aigc): A survey. arXiv preprint arXiv:2304.06632 (2023)."},{"key":"e_1_3_2_2_66_1","volume-title":"Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564","author":"Wu Shijie","year":"2023","unstructured":"Shijie Wu, Ozan Irsoy, Steven Lu, Vadim Dabravolski, Mark Dredze, Sebastian Gehrmann, Prabhanjan Kambadur, David Rosenberg, and Gideon Mann. 2023. Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564 (2023)."},{"key":"e_1_3_2_2_67_1","volume-title":"Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808","author":"Xiong Lee","year":"2020","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul Bennett, Junaid Ahmed, and Arnold Overwijk. 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808 (2020)."},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277809"},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557388"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.365"},{"key":"e_1_3_2_2_71_1","volume-title":"Harnessing the power of llms in practice: A survey on chatgpt and beyond. arXiv preprint arXiv:2304.13712","author":"Yang Jingfeng","year":"2023","unstructured":"Jingfeng Yang, Hongye Jin, Ruixiang Tang, Xiaotian Han, Qizhang Feng, Haoming Jiang, Bing Yin, and Xia Hu. 2023. Harnessing the power of llms in practice: A survey on chatgpt and beyond. arXiv preprint arXiv:2304.13712 (2023)."},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441667"},{"key":"e_1_3_2_2_73_1","volume-title":"Dense Text Retrieval based on Pretrained Language Models: A Survey. ACM Trans. Inf. Syst. (dec","author":"Zhao Wayne Xin","year":"2023","unstructured":"Wayne Xin Zhao, Jing Liu, Ruiyang Ren, and Ji-Rong Wen. 2023. Dense Text Retrieval based on Pretrained Language Models: A Survey. ACM Trans. Inf. Syst. (dec 2023)."},{"key":"e_1_3_2_2_74_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023. A survey of large language models. arXiv preprint arXiv:2303.18223 (2023)."},{"key":"e_1_3_2_2_75_1","volume-title":"Large language models for information retrieval: A survey. arXiv preprint arXiv:2308.07107","author":"Zhu Yutao","year":"2023","unstructured":"Yutao Zhu, Huaying Yuan, Shuting Wang, Jiongnan Liu, Wenhan Liu, Chenlong Deng, Zhicheng Dou, and Ji-Rong Wen. 2023. Large language models for information retrieval: A survey. arXiv preprint arXiv:2308.07107 (2023)."},{"key":"e_1_3_2_2_76_1","volume-title":"Exploring ai ethics of chatgpt: A diagnostic analysis. arXiv preprint arXiv:2301.12867","author":"Zhuo Terry Yue","year":"2023","unstructured":"Terry Yue Zhuo, Yujin Huang, Chunyang Chen, and Zhenchang Xing. 2023. Exploring ai ethics of chatgpt: A diagnostic analysis. arXiv preprint arXiv:2301.12867 (2023)."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671882","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671882","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:15Z","timestamp":1750291455000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671882"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":76,"alternative-id":["10.1145\/3637528.3671882","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671882","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}