{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:40:06Z","timestamp":1755866406428,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3731969","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:38:52Z","timestamp":1752457132000},"page":"4385-4389","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Data-efficient Meta-models for Evaluation of Context-based Questions and Answers in LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7829-1249","authenticated-orcid":false,"given":"Julia","family":"Belikova","sequence":"first","affiliation":[{"name":"Sber AI Lab, Moscow, Russian Federation and Moscow Institute of Physics and Technology, Dolgoprudny, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0504-5940","authenticated-orcid":false,"given":"Konstantin","family":"Polev","sequence":"additional","affiliation":[{"name":"Sber AI Lab, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1678-1243","authenticated-orcid":false,"given":"Rauf","family":"Parchiev","sequence":"additional","affiliation":[{"name":"Sber AI Lab, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3199-479X","authenticated-orcid":false,"given":"Dmitry","family":"Simakov","sequence":"additional","affiliation":[{"name":"Sber AI Lab, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-naacl.181"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics: Industry Track. 398-409","author":"Belyi Masha","year":"2025","unstructured":"Masha Belyi, Robert Friel, Shuai Shao, and Atindriyo Sanyal. 2025. Luna: A Lightweight Evaluation Model to Catch Language Model Hallucinations with High Accuracy and Low Cost. In Proceedings of the 31st International Conference on Computational Linguistics: Industry Track. 398-409."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track. 1207-1217","author":"Chen Yuemin","year":"2024","unstructured":"Yuemin Chen, FeifanWu, JingweiWang, Hao Qian, Ziqi Liu, Zhiqiang Zhang, Jun Zhou, and Meng Wang. 2024. Knowledge-augmented Financial Market Analysis and Report Generation. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track. 1207-1217."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.84"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.276"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-demo.16"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00330"},{"key":"e_1_3_2_1_9_1","volume-title":"Ragbench: Explainable benchmark for retrieval-augmented generation systems. arXiv preprint arXiv:2407.11005","author":"Friel Robert","year":"2024","unstructured":"Robert Friel, Masha Belyi, and Atindriyo Sanyal. 2024. Ragbench: Explainable benchmark for retrieval-augmented generation systems. arXiv preprint arXiv:2407.11005 (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.390"},{"key":"e_1_3_2_1_11_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"Tabpfn: A transformer that solves small tabular classification problems in a second. arXiv preprint arXiv:2207.01848","author":"Hollmann Noah","year":"2022","unstructured":"Noah Hollmann, Samuel M\u00fcller, Katharina Eggensperger, and Frank Hutter. 2022. Tabpfn: A transformer that solves small tabular classification problems in a second. arXiv preprint arXiv:2207.01848 (2022)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-024-08328-6"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3519464"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_16_1","volume-title":"Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation. In The Eleventh International Conference on Learning Representations, ICLR 2023","author":"Kuhn Lorenz","year":"2023","unstructured":"Lorenz Kuhn, Yarin Gal, and Sebastian Farquhar. 2023. Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net. https:\/\/openreview.net\/pdf?id=VD-AYtP0dve"},{"key":"e_1_3_2_1_17_1","unstructured":"Aixin Liu Bei Feng Bing Xue BingxuanWang BochaoWu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:2412.19437 (2024)."},{"key":"e_1_3_2_1_18_1","volume-title":"Expertqa: Expert-curated questions and attributed answers. arXiv preprint arXiv:2309.07852","author":"Malaviya Chaitanya","year":"2023","unstructured":"Chaitanya Malaviya, Subin Lee, Sihao Chen, Elizabeth Sieber, Mark Yatskar, and Dan Roth. 2023. Expertqa: Expert-curated questions and attributed answers. arXiv preprint arXiv:2309.07852 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Gales","author":"Manakul Potsawee","year":"2023","unstructured":"Potsawee Manakul, Adian Liusie, and Mark J. F. Gales. 2023. SelfCheckGPT: Zero- Resource Black-Box Hallucination Detection for Generative Large Language Models. arXiv:2303.08896 [cs.CL] https:\/\/arxiv.org\/abs\/2303.08896"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.33"},{"key":"e_1_3_2_1_21_1","volume-title":"Question answering over electronic devices: A new benchmark dataset and a multi-task learning based QA framework. arXiv preprint arXiv:2109.05897","author":"Nandy Abhilash","year":"2021","unstructured":"Abhilash Nandy, Soumya Sharma, Shubham Maddhashiya, Kapil Sachdeva, Pawan Goyal, and Niloy Ganguly. 2021. Question answering over electronic devices: A new benchmark dataset and a multi-task learning based QA framework. arXiv preprint arXiv:2109.05897 (2021)."},{"key":"e_1_3_2_1_22_1","volume-title":"Ragtruth: A hallucination corpus for developing trustworthy retrieval-augmented language models. arXiv preprint arXiv:2401.00396","author":"Niu Cheng","year":"2023","unstructured":"Cheng Niu, Yuanhao Wu, Juno Zhu, Siliang Xu, Kashun Shum, Randy Zhong, Juntong Song, and Tong Zhang. 2023. Ragtruth: A hallucination corpus for developing trustworthy retrieval-augmented language models. arXiv preprint arXiv:2401.00396 (2023)."},{"key":"e_1_3_2_1_23_1","unstructured":"Hadas Orgad Michael Toker Zorik Gekhman Roi Reichart Idan Szpektor Hadas Kotek and Yonatan Belinkov. 2024. LLMs Know More Than They Show: On the Intrinsic Representation of LLM Hallucinations. arXiv:2410.02707 [cs.CL] https:\/\/arxiv.org\/abs\/2410.02707"},{"key":"e_1_3_2_1_24_1","volume-title":"Anna Veronika Dorogush, and Andrey Gulin","author":"Prokhorenkova Liudmila","year":"2018","unstructured":"Liudmila Prokhorenkova, Gleb Gusev, Aleksandr Vorobev, Anna Veronika Dorogush, and Andrey Gulin. 2018. CatBoost: unbiased boosting with categorical features. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.20"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.685"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3677052.3698671"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","first-page":"4401","DOI":"10.18653\/v1\/2024.findings-acl.260","article-title":"Do Androids Know They're Only Dreaming of Electric Sheep?","volume":"2024","author":"Sky Wang","year":"2024","unstructured":"CH-Wang Sky, Benjamin Van Durme, Jason Eisner, and Chris Kedzie. 2024. Do Androids Know They're Only Dreaming of Electric Sheep?. In Findings of the Association for Computational Linguistics ACL 2024. 4401-4420.","journal-title":"Findings of the Association for Computational Linguistics ACL"},{"key":"e_1_3_2_1_29_1","volume-title":"Cassidy Hardin, Surya Bhupatiraju, L\u00e9onard Hussenot, Thomas Mesnard, Bobak Shahriari, Alexandre Ram\u00e9, et al.","author":"Team Gemma","year":"2024","unstructured":"Gemma Team, Morgane Riviere, Shreya Pathak, Pier Giuseppe Sessa, Cassidy Hardin, Surya Bhupatiraju, L\u00e9onard Hussenot, Thomas Mesnard, Bobak Shahriari, Alexandre Ram\u00e9, et al. 2024. Gemma 2: Improving open language models at a practical size. arXiv preprint arXiv:2408.00118 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00390"},{"key":"e_1_3_2_1_31_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_32_1","volume-title":"arXiv preprint arXiv:2412.15115","author":"Yang An","year":"2024","unstructured":"An Yang, Baosong Yang, Beichen Zhang, Binyuan Hui, Bo Zheng, Bowen Yu, Chengyuan Li, Dayiheng Liu, Fei Huang, Haoran Wei, Huan Lin, Jian Yang, Jianhong Tu, Jianwei Zhang, Jianxin Yang, Jiaxi Yang, Jingren Zhou, Junyang Lin, Kai Dang, Keming Lu, Keqin Bao, Kexin Yang, Le Yu, Mei Li, Mingfeng Xue, Pei Zhang, Qin Zhu, Rui Men, Runji Lin, Tianhao Li, Tingyu Xia, Xingzhang Ren, Xuancheng Ren, Yang Fan, Yang Su, Yichang Zhang, Yu Wan, Yuqiong Liu, Zeyu Cui, Zhenru Zhang, and Zihan Qiu. 2024. Qwen2.5 Technical Report. arXiv preprint arXiv:2412.15115 (2024)."},{"key":"e_1_3_2_1_33_1","unstructured":"Yue Zhang Yafu Li Leyang Cui Deng Cai Lemao Liu Tingchen Fu Xinting Huang Enbo Zhao Yu Zhang Yulong Chen et al. 2023. Siren's song in the AI ocean: a survey on hallucination in large language models. arXiv preprint arXiv:2309.01219 (2023)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-industry.23"},{"key":"e_1_3_2_1_35_1","first-page":"46595","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","volume":"36","author":"Zheng Lianmin","year":"2023","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric Xing, et al. 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in Neural Information Processing Systems 36 (2023), 46595-46623.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3731969","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:12:45Z","timestamp":1755864765000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3731969"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":35,"alternative-id":["10.1145\/3726302.3731969","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3731969","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}