{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:59:04Z","timestamp":1776931144118,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,15]]},"DOI":"10.1145\/3768292.3770354","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:24:26Z","timestamp":1763105066000},"page":"141-149","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Query Generation Pipeline with Enhanced Answerability Assessment for Financial Information Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7858-8136","authenticated-orcid":false,"given":"Hyunkyu","family":"Kim","sequence":"first","affiliation":[{"name":"Kakaobank, Seongnam-si, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7370-3178","authenticated-orcid":false,"given":"Yeeun","family":"Yoo","sequence":"additional","affiliation":[{"name":"Kakaobank, Seongnam-si, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4805-8786","authenticated-orcid":false,"given":"Youngjun","family":"Kwak","sequence":"additional","affiliation":[{"name":"Kakaobank, Seongnam-si, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,14]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Lucas Bandarkar Davis Liang Benjamin Muller Mikel Artetxe Satya\u00a0Narayan Shukla Donald Husa Naman Goyal Abhinandan Krishnan Luke Zettlemoyer and Madian Khabsa. 2023. The belebele benchmark: a parallel reading comprehension dataset in 122 language variants. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.16884 (2023).","DOI":"10.18653\/v1\/2024.acl-long.44"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","unstructured":"Haonan Chen Carlos Lassance and Jimmy Lin. 2023. End-to-End Retrieval with Learned Dense and Sparse Representations Using Lucene. ArXiv abs\/2311.18503 (2023). 10.48550\/arXiv.2311.18503","DOI":"10.48550\/arXiv.2311.18503"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Jianlv Chen Shitao Xiao Peitian Zhang Kun Luo Defu Lian and Zheng Liu. 2024. BGE M3-Embedding: Multi-Lingual Multi-Functionality Multi-Granularity Text Embeddings Through Self-Knowledge Distillation. arxiv:https:\/\/arXiv.org\/abs\/2402.03216\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2402.03216","DOI":"10.18653\/v1\/2024.findings-acl.137"},{"key":"e_1_3_3_1_5_2","unstructured":"Jian Chen Peilin Zhou Yining Hua Yingxin Loh Kehui Chen Ziyuan Li Bing Zhu and Junwei Liang. 2024. FinTextQA: A Dataset for Long-form Financial Question Answering. arxiv:https:\/\/arXiv.org\/abs\/2405.09980\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2405.09980"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Zhiyu Chen Wenhu Chen Charese Smiley Sameena Shah Iana Borova Dylan Langdon Reema Moussa Matt Beane Ting-Hao Huang Bryan Routledge et\u00a0al. 2021. Finqa: A dataset of numerical reasoning over financial data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.00122 (2021).","DOI":"10.18653\/v1\/2021.emnlp-main.300"},{"key":"e_1_3_3_1_7_2","first-page":"740","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics","author":"Chen Zhe","year":"2025","unstructured":"Zhe Chen, Pengjie Ren, Fuhui Sun, Xiaoyan Wang, Yujun Li, Siwen Zhao, and Tengyi Yang. 2025. SLARD: A Chinese Superior Legal Article Retrieval Dataset. In Proceedings of the 31st International Conference on Computational Linguistics. 740\u2013754."},{"key":"e_1_3_3_1_8_2","unstructured":"Stephen Choi William Gazeley Siu\u00a0Ho Wong and Tingting Li. 2023. Conversational Financial Information Retrieval Model (ConFIRM). arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.13001 (2023)."},{"key":"e_1_3_3_1_9_2","unstructured":"DeepSeek-AI Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu et\u00a0al. 2025. DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning. arxiv:https:\/\/arXiv.org\/abs\/2501.12948\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2501.12948"},{"key":"e_1_3_3_1_10_2","unstructured":"Laxman Dhulipala Majid Hadian Rajesh Jayaram Jason Lee and Vahab Mirrokni. 2024. MUVERA: Multi-Vector Retrieval via Fixed Dimensional Encodings. arxiv:https:\/\/arXiv.org\/abs\/2405.19504\u00a0[cs.DS] https:\/\/arxiv.org\/abs\/2405.19504"},{"key":"e_1_3_3_1_11_2","unstructured":"Yihao Ding Kaixuan Ren Jiabin Huang Siwen Luo and Soyeon\u00a0Caren Han. 2024. PDF-MVQA: A Dataset for Multimodal Information Retrieval in PDF-based Visual Question Answering. arxiv:https:\/\/arXiv.org\/abs\/2404.12720\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2404.12720"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","unstructured":"Fuli Feng Cheng Luo Xiangnan He Yiqun Liu and Tat-Seng Chua. 2020. FinIR 2020: The First Workshop on Information Retrieval in Finance. (2020) 2451\u20132454. 10.1145\/3397271.3401462","DOI":"10.1145\/3397271.3401462"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Thibault Formal Carlos Lassance Benjamin Piwowarski and S. Clinchant. 2023. Towards Effective and Efficient Sparse Neural Information Retrieval. ACM Transactions on Information Systems 42 (2023) 1 \u2013 46. 10.1145\/3634912","DOI":"10.1145\/3634912"},{"key":"e_1_3_3_1_14_2","unstructured":"Weiping Fu Bifan Wei Jianxiang Hu Zhongmin Cai and Jun Liu. 2024. QGEval: Benchmarking Multi-dimensional Evaluation for Question Generation. arxiv:https:\/\/arXiv.org\/abs\/2406.05707\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2406.05707"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Mor Geva Daniel Khashabi Elad Segal Tushar Khot Dan Roth and Jonathan Berant. 2021. Did aristotle use a laptop? a question answering benchmark with implicit reasoning strategies. Transactions of the Association for Computational Linguistics 9 (2021) 346\u2013361.","DOI":"10.1162\/tacl_a_00370"},{"key":"e_1_3_3_1_16_2","unstructured":"Xanh Ho Anh-Khoa\u00a0Duong Nguyen Saku Sugawara and Akiko Aizawa. 2020. Constructing a multi-hop QA dataset for comprehensive evaluation of reasoning steps. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2011.01060 (2020)."},{"key":"e_1_3_3_1_17_2","unstructured":"Yewon Hwang Sungbum Jung Hanwool Lee and Sara Yu. 2025. TWICE: What Advantages Can Low-Resource Domain-Specific Embedding Model Bring? \u2013 A Case Study on Korea Financial Texts. arxiv:https:\/\/arXiv.org\/abs\/2502.07131\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2502.07131"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Kalervo J\u00e4rvelin and Jaana Kek\u00e4l\u00e4inen. 2002. Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems (TOIS) 20 4 (2002) 422\u2013446.","DOI":"10.1145\/582415.582418"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591911"},{"key":"e_1_3_3_1_21_2","unstructured":"Omar Khattab and Matei Zaharia. 2020. ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT. arxiv:https:\/\/arXiv.org\/abs\/2004.12832\u00a0[cs.IR] https:\/\/arxiv.org\/abs\/2004.12832"},{"key":"e_1_3_3_1_22_2","unstructured":"Seunghee Kim Changhyeon Kim and Taeuk Kim. 2024. FCMR: Robust Evaluation of Financial Cross-Modal Multi-Hop Reasoning. arxiv:https:\/\/arXiv.org\/abs\/2412.12567\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2412.12567"},{"key":"e_1_3_3_1_23_2","unstructured":"Chankyu Lee Rajarshi Roy Mengyao Xu Jonathan Raiman Mohammad Shoeybi Bryan Catanzaro and Wei Ping. 2024. NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.17428 (2024)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657887"},{"key":"e_1_3_3_1_25_2","unstructured":"Zehan Li Xin Zhang Yanzhao Zhang Dingkun Long Pengjun Xie and Meishan Zhang. 2023. Towards general text embeddings with multi-stage contrastive learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.03281 (2023)."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Yang Liu Dan Iter Yichong Xu Shuohang Wang Ruochen Xu and Chenguang Zhu. 2023. G-Eval: NLG Evaluation using GPT-4 with Better Human Alignment. arxiv:https:\/\/arXiv.org\/abs\/2303.16634\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2303.16634","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"e_1_3_3_1_27_2","unstructured":"Ehsan Lotfi Nikolay Banar Nerses Yuzbashyan and Walter Daelemans. 2024. Bilingual BSARD: Extending Statutory Article Retrieval to Dutch. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.07462 (2024)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","unstructured":"Y. Luan Jacob Eisenstein Kristina Toutanova and M. Collins. 2020. Sparse Dense and Attentional Representations for Text Retrieval. Transactions of the Association for Computational Linguistics 9 (2020) 329\u2013345. 10.1162\/tacla00369","DOI":"10.1162\/tacla00369"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624918.3625334"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463250"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3184558.3192301"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463030"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","unstructured":"Priyanka Mandikal and Raymond Mooney. 2024. Sparse Meets Dense: A Hybrid Approach to Enhance Scientific Document Retrieval. ArXiv abs\/2401.04055 (2024). 10.48550\/arXiv.2401.04055","DOI":"10.48550\/arXiv.2401.04055"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Niklas Muennighoff Nouamane Tazi Lo\u00efc Magne and Nils Reimers. 2023. MTEB: Massive Text Embedding Benchmark. arxiv:https:\/\/arXiv.org\/abs\/2210.07316\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2210.07316","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"e_1_3_3_1_35_2","unstructured":"Arvind Neelakantan Tao Xu Raul Puri Alec Radford Jesse\u00a0Michael Han Jerry Tworek Qiming Yuan Nikolas Tezak Jong\u00a0Wook Kim Chris Hallacy Johannes Heidecke Pranav Shyam Boris Power Tyna\u00a0Eloundou Nekoul Girish Sastry Gretchen Krueger David Schnurr Felipe\u00a0Petroski Such Kenny Hsu Madeleine Thompson Tabarak Khan Toki Sherbakov Joanne Jang Peter Welinder and Lilian Weng. 2022. Text and Code Embeddings by Contrastive Pre-Training. arxiv:https:\/\/arXiv.org\/abs\/2201.10005\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2201.10005"},{"key":"e_1_3_3_1_36_2","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman et\u00a0al. 2024. GPT-4 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2303.08774\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_3_1_37_2","unstructured":"Varshini Reddy Rik Koncel-Kedziorski Viet\u00a0Dac Lai Michael Krumdick Charles Lovering and Chris Tanner. 2024. DocFinQA: A Long-Context Financial Reasoning Dataset. arxiv:https:\/\/arXiv.org\/abs\/2401.06915\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2401.06915"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"crossref","unstructured":"Stephen Robertson Hugo Zaragoza et\u00a0al. 2009. The probabilistic relevance framework: BM25 and beyond. Foundations and Trends\u00ae in Information Retrieval 3 4 (2009) 333\u2013389.","DOI":"10.1561\/1500000019"},{"key":"e_1_3_3_1_39_2","unstructured":"Soumya Sharma Tapas Nayak Arusarka Bose Ajay\u00a0Kumar Meena Koustuv Dasgupta Niloy Ganguly and Pawan Goyal. 2023. FinRED: A Dataset for Relation Extraction in Financial Domain. arxiv:https:\/\/arXiv.org\/abs\/2306.03736\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2306.03736"},{"key":"e_1_3_3_1_40_2","unstructured":"Alon Talmor and Jonathan Berant. 2018. The Web as a Knowledge-base for Answering Complex Questions. arxiv:https:\/\/arXiv.org\/abs\/1803.06643\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1803.06643"},{"key":"e_1_3_3_1_41_2","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. Beir: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.08663 (2021)."},{"key":"e_1_3_3_1_42_2","unstructured":"Francisco Valentini Viviana Cotik Dami\u00e1n Furman Ivan Bercovich Edgar Altszyler and Juan\u00a0Manuel P\u00e9rez. 2024. MessIRve: A Large-Scale Spanish Information Retrieval Dataset. arxiv:https:\/\/arXiv.org\/abs\/2409.05994\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2409.05994"},{"key":"e_1_3_3_1_43_2","unstructured":"Jianyou Wang Kaicheng Wang Xiaoyue Wang Prudhviraj Naidu Leon Bergen and Ramamohan Paturi. 2023. DORIS-MAE: Scientific Document Retrieval using Multi-level Aspect-based Queries. arxiv:https:\/\/arXiv.org\/abs\/2310.04678\u00a0[cs.IR] https:\/\/arxiv.org\/abs\/2310.04678"},{"key":"e_1_3_3_1_44_2","unstructured":"Jinyuan Wang Hai Zhao Zhong Wang Zeyang Zhu Jinhao Xie Yong Yu Yongjian Fei Yue Huang and Dawei Cheng. 2023. CSPRD: A Financial Policy Retrieval Dataset for Chinese Stock Market. arxiv:https:\/\/arXiv.org\/abs\/2309.04389\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2309.04389"},{"key":"e_1_3_3_1_45_2","unstructured":"Liang Wang Nan Yang Xiaolong Huang Binxing Jiao Linjun Yang Daxin Jiang Rangan Majumder and Furu Wei. 2024. Text Embeddings by Weakly-Supervised Contrastive Pre-training. arxiv:https:\/\/arXiv.org\/abs\/2212.03533\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2212.03533"},{"key":"e_1_3_3_1_46_2","unstructured":"Liang Wang Nan Yang Xiaolong Huang Linjun Yang Rangan Majumder and Furu Wei. 2023. Improving Text Embeddings with Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.00368 (2023)."},{"key":"e_1_3_3_1_47_2","unstructured":"Liang Wang Nan Yang Xiaolong Huang Linjun Yang Rangan Majumder and Furu Wei. 2024. Multilingual E5 Text Embeddings: A Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.05672 (2024)."},{"key":"e_1_3_3_1_48_2","unstructured":"Lee Xiong Chenyan Xiong Ye Li Kwok-Fung Tang Jialin Liu Paul Bennett Junaid Ahmed and Arnold Overwijk. 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2007.00808 (2020)."},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"crossref","unstructured":"Zhilin Yang Peng Qi Saizheng Zhang Yoshua Bengio William\u00a0W Cohen Ruslan Salakhutdinov and Christopher\u00a0D Manning. 2018. HotpotQA: A dataset for diverse explainable multi-hop question answering. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1809.09600 (2018).","DOI":"10.18653\/v1\/D18-1259"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"crossref","unstructured":"Xinyu Zhang Nandan Thakur Odunayo Ogundepo Ehsan Kamalloo David Alfonso-Hermelo Xiaoguang Li Qun Liu Mehdi Rezagholizadeh and Jimmy Lin. 2023. Miracl: A multilingual retrieval dataset covering 18 diverse languages. Transactions of the Association for Computational Linguistics 11 (2023) 1114\u20131131.","DOI":"10.1162\/tacl_a_00595"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-short.2"}],"event":{"name":"ICAIF '25: 6th ACM International Conference on AI in Finance","location":"Singapore Singapore","acronym":"ICAIF '25"},"container-title":["Proceedings of the 6th ACM International Conference on AI in Finance"],"original-title":[],"deposited":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:30:25Z","timestamp":1763105425000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3768292.3770354"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,14]]},"references-count":50,"alternative-id":["10.1145\/3768292.3770354","10.1145\/3768292"],"URL":"https:\/\/doi.org\/10.1145\/3768292.3770354","relation":{},"subject":[],"published":{"date-parts":[[2025,11,14]]},"assertion":[{"value":"2025-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}