{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:23:05Z","timestamp":1776082985185,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","funder":[{"name":"Swiss National Science Foundation &#x28;SNSF&#x29;","award":["215742"],"award-info":[{"award-number":["215742"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,7]]},"DOI":"10.1145\/3767695.3769502","type":"proceedings-article","created":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:14:58Z","timestamp":1764782098000},"page":"353-363","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Investigating LLM Variability in Personalized Conversational Information Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2383-4557","authenticated-orcid":false,"given":"Simon","family":"Lupart","sequence":"first","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2571-842X","authenticated-orcid":false,"given":"Dani\u00ebl","family":"van Dijk","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5614-1749","authenticated-orcid":false,"given":"Eric","family":"Langezaal","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5881-760X","authenticated-orcid":false,"given":"Ian","family":"van Dort","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9447-4172","authenticated-orcid":false,"given":"Mohammad","family":"Aliannejadi","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Zahra Abbasiantaeb Simon Lupart and Mohammad Aliannejadi. 2024. Generating Multi-Aspect Queries for Conversational Search. arXiv:2403.19302 [cs.IR] https:\/\/arxiv.org\/abs\/2403.19302"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730316"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657860"},{"key":"e_1_3_2_1_4_1","volume-title":"The Thirty-Third Text REtrieval Conference Proceedings (TREC 2024","author":"Aliannejadi Mohammad","year":"2024","unstructured":"Mohammad Aliannejadi, Zahra Abbasiantaeb, Simon Lupart, Shubham Chatterjee, Jeffrey Dalton, and Leif Azzopardi. 2024b. TREC iKAT 2024: The Interactive Knowledge Assistance Track Overview. In The Thirty-Third Text REtrieval Conference Proceedings (TREC 2024), Gaithersburg, MD, USA, November 15-18, 2024 (NIST Special Publication, Vol. 1329). National Institute of Standards and Technology (NIST). https:\/\/trec.nist.gov\/pubs\/trec33\/papers\/Overview_ikat.pdf"},{"key":"e_1_3_2_1_5_1","unstructured":"Berk Atil Alexa Chittams Liseng Fu Ferhan Ture Lixinyu Xu and Breck Baldwin. 2024. LLM Stability: A detailed analysis with some surprises. arXiv:2408.04667 [cs.CL] https:\/\/arxiv.org\/abs\/2408.04667"},{"key":"e_1_3_2_1_6_1","volume-title":"The Behavior Gap: Evaluating Zero-shot LLM Agents in Complex Task-Oriented Dialogs. arXiv preprint arXiv:2506.12266","author":"Baidya Avinash","year":"2025","unstructured":"Avinash Baidya, Kamalika Das, and Xiang Gao. 2025. The Behavior Gap: Evaluating Zero-shot LLM Agents in Complex Task-Oriented Dialogs. arXiv preprint arXiv:2506.12266 (2025)."},{"key":"e_1_3_2_1_7_1","volume-title":"Cohn","author":"Blackwell Robert E.","year":"2024","unstructured":"Robert E. Blackwell, Jon Barry, and Anthony G. Cohn. 2024. Towards Reproducible LLM Evaluation: Quantifying Uncertainty in LLM Benchmark Scores. arXiv:2410.03492 [cs.CL] https:\/\/arxiv.org\/abs\/2410.03492"},{"key":"e_1_3_2_1_8_1","first-page":"69022","article-title":"On the worst prompt performance of large language models","volume":"37","author":"Cao Bowen","year":"2024","unstructured":"Bowen Cao, Deng Cai, Zhisong Zhang, Yuexian Zou, and Wai Lam. 2024a. On the worst prompt performance of large language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 69022-69042.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","unstructured":"Bowen Cao Deng Cai Zhisong Zhang Yuexian Zou and Wai Lam. 2024b. On the Worst Prompt Performance of Large Language Models. arXiv:2406.10248 [cs.CL] https:\/\/arxiv.org\/abs\/2406.10248"},{"key":"e_1_3_2_1_10_1","unstructured":"Sherman Chann. 2023. Non-determinism in GPT-4 is caused by Sparse MoE. https:\/\/152334h.github.io\/blog\/non-determinism-in-gpt-4\/"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401206"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1605"},{"key":"e_1_3_2_1_13_1","volume-title":"What Did I Do Wrong? Quantifying LLMs' Sensitivity and Consistency to Prompt Engineering. arXiv preprint arXiv:2406.12334","author":"Errica Federico","year":"2024","unstructured":"Federico Errica, Giuseppe Siracusano, Davide Sanvito, and Roberto Bifulco. 2024. What Did I Do Wrong? Quantifying LLMs' Sensitivity and Consistency to Prompt Engineering. arXiv preprint arXiv:2406.12334 (2024)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401418"},{"key":"e_1_3_2_1_15_1","volume-title":"Neural Approaches to Conversational Information Retrieval. CoRR abs\/2201.05176","author":"Gao Jianfeng","year":"2022","unstructured":"Jianfeng Gao, Chenyan Xiong, Paul Bennett, and Nick Craswell. 2022. Neural Approaches to Conversational Information Retrieval. CoRR abs\/2201.05176 (2022). arXiv:2201.05176 https:\/\/arxiv.org\/abs\/2201.05176"},{"key":"e_1_3_2_1_16_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan Amy Yang Angela Fan Anirudh Goyal Anthony Hartshorn Aobo Yang Archi Mitra Archie Sravankumar Artem Korenev Arthur Hinsvark Arun Rao Aston Zhang Aurelien Rodriguez Austen Gregerson Ava Spataru Baptiste Roziere Bethany Biron Binh Tang Bobbie Chern Charlotte Caucheteux Chaya Nayak Chloe Bi Chris Marra Chris McConnell et al. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3295776"},{"key":"e_1_3_2_1_18_1","unstructured":"Binyuan Hui Jian Yang Zeyu Cui Jiaxi Yang Dayiheng Liu Lei Zhang Tianyu Liu Jiajun Zhang Bowen Yu Keming Lu et al. 2024. Qwen2. 5-coder technical report. arXiv preprint arXiv:2409.12186 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.443"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_21_1","volume-title":"Longlamp: A benchmark for personalized long-form text generation.","author":"Kumar Ishita","year":"2024","unstructured":"Ishita Kumar, Snigdha Viswanathan, Sushrita Yerra, Alireza Salemi, Ryan A Rossi, Franck Dernoncourt, Hanieh Deilamsalehy, Xiang Chen, Ruiyi Zhang, Shubham Agarwal, et al., 2024. Longlamp: A benchmark for personalized long-form text generation."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463238"},{"key":"e_1_3_2_1_23_1","volume-title":"Learned Sparse Retrieval with Multi-aspect LLM Query Generation for Conversational Search. arXiv preprint arXiv:2411.14739","author":"Lupart Simon","year":"2024","unstructured":"Simon Lupart, Zahra Abbasiantaeb, and Mohammad Aliannejadi. 2024. IRLab@iKAT24: Learned Sparse Retrieval with Multi-aspect LLM Query Generation for Conversational Search. arXiv preprint arXiv:2411.14739 (2024)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3729966"},{"key":"e_1_3_2_1_25_1","unstructured":"Kelong Mao Chenlong Deng Haonan Chen Fengran Mo Zheng Liu Tetsuya Sakai and Zhicheng Dou. 2024. ChatRetriever: Adapting Large Language Models for Generalized and Robust Conversational Dense Retrieval. arXiv:2404.13556 [cs.IR] https:\/\/arxiv.org\/abs\/2404.13556"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.86"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531961"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.543"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730023"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.135"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679939"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3697010"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3536321"},{"key":"e_1_3_2_1_34_1","volume-title":"Quantifying generalization complexity for large language models. arXiv preprint arXiv:2410.01769","author":"Qi Zhenting","year":"2024","unstructured":"Zhenting Qi, Hongyin Luo, Xuliang Huang, Zhuokai Zhao, Yibo Jiang, Xiangjun Fan, Himabindu Lakkaraju, and James Glass. 2024. Quantifying generalization complexity for large language models. arXiv preprint arXiv:2410.01769 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401110"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020165.3020183"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.3390\/math12060929"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Alireza Salemi Julian Killingback and Hamed Zamani. 2025. ExPerT: Effective and Explainable Evaluation of Personalized Long-Form Text Generation.","DOI":"10.18653\/v1\/2025.findings-acl.900"},{"key":"e_1_3_2_1_39_1","volume-title":"Lamp: When large language models meet personalization.","author":"Salemi Alireza","year":"2023","unstructured":"Alireza Salemi, Sheshera Mysore, Michael Bendersky, and Hamed Zamani. 2023. Lamp: When large language models meet personalization."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/3600270.3601857"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","unstructured":"Yiming Wang Ziyang Zhang Hanwei Chen and Huayi Shen. 2024. Reasoning with Large Language Models on Graph Tasks: The Influence of Temperature. 630-634 pages. doi:10.1109\/ICCEA62105.2024.10603677","DOI":"10.1109\/ICCEA62105.2024.10603677"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-24755-2_7"},{"key":"e_1_3_2_1_43_1","unstructured":"Lee Xiong Chenyan Xiong Ye Li Kwok-Fung Tang Jialin Liu Paul Bennett Junaid Ahmed and Arnold Overwijk. 2020. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. arXiv:2007.00808 [cs.IR]"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.398"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462856"}],"event":{"name":"SIGIR-AP 2025:Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","location":"Xi'an China","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 2025 Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region"],"original-title":[],"deposited":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:20:29Z","timestamp":1764782429000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3767695.3769502"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":45,"alternative-id":["10.1145\/3767695.3769502","10.1145\/3767695"],"URL":"https:\/\/doi.org\/10.1145\/3767695.3769502","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}