{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T07:31:59Z","timestamp":1772695919513,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3680082","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"4922-4930","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["\"Reasoning before Responding\": Towards Legal Long-form Question Answering with Interpretability"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1590-2466","authenticated-orcid":false,"given":"Utkarsh","family":"Ujwal","sequence":"first","affiliation":[{"name":"University of Liverpool, Liverpool, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4358-3184","authenticated-orcid":false,"given":"Sai Sri Harsha","family":"Surampudi","sequence":"additional","affiliation":[{"name":"University of Liverpool, Liverpool, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8140-6499","authenticated-orcid":false,"given":"Sayantan","family":"Mitra","sequence":"additional","affiliation":[{"name":"JPMorgan Chase &amp; Co., Bangalore, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3252-0997","authenticated-orcid":false,"given":"Tulika","family":"Saha","sequence":"additional","affiliation":[{"name":"University of Liverpool, Liverpool, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114920"},{"key":"e_1_3_2_1_3_1","volume-title":"capability and the experience of rights problems","author":"Balmer Nigel J","year":"2010","unstructured":"Nigel J Balmer, Alexy Buck, Ash Patel, Catrina Denvir, and Pascoe Pleasence. 2010. Knowledge, capability and the experience of rights problems. London: PLEnet (2010)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-15712-8_27"},{"key":"e_1_3_2_1_5_1","volume-title":"Shivanshu Purohit, USVSN Sai Prashanth, Edward Raff, Aviya Skowron, Lintang Sutawika, and Oskar van der Wal.","author":"Biderman Stella","year":"2023","unstructured":"Stella Biderman, Hailey Schoelkopf, Quentin Anthony, Herbie Bradley, Kyle O'Brien, Eric Hallahan, Mohammad Aflah Khan, Shivanshu Purohit, USVSN Sai Prashanth, Edward Raff, Aviya Skowron, Lintang Sutawika, and Oskar van der Wal. 2023. Pythia: A Suite for Analyzing Large Language Models Across Training and Scaling. arxiv: 2304.01373 [cs.CL]"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo"},{"key":"e_1_3_2_1_7_1","unstructured":"Vladimir Blagojevic. 2022. Long-Form QA beyond ELI5: an updated dataset and approach. towardsdatascience.com\/long-form-qa-beyond-eli5-an-updated-dataset-and-approach-319cb841aabb"},{"key":"e_1_3_2_1_8_1","volume-title":"Manning","author":"Bowman Samuel R.","year":"2015","unstructured":"Samuel R. Bowman, Gabor Angeli, Christopher Potts, and Christopher D. Manning. 2015. A large annotated corpus for learning natural language inference. CoRR, Vol. abs\/1508.05326 (2015). showeprint[arXiv]1508.05326 http:\/\/arxiv.org\/abs\/1508.05326"},{"key":"e_1_3_2_1_9_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, and Amanda Askell. 2020. Language models are few-shot learners. Advances in neural information processing systems, Vol. 33 (2020), 1877--1901."},{"key":"e_1_3_2_1_10_1","volume-title":"LEGAL-BERT: The muppets straight out of law school. arXiv preprint arXiv:2010.02559","author":"Chalkidis Ilias","year":"2020","unstructured":"Ilias Chalkidis, Manos Fergadiotis, Prodromos Malakasiotis, Nikolaos Aletras, and Ion Androutsopoulos. 2020. LEGAL-BERT: The muppets straight out of law school. arXiv preprint arXiv:2010.02559 (2020)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3594536.3595159"},{"key":"e_1_3_2_1_12_1","volume-title":"Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM. https:\/\/www.databricks.com\/blog\/2023\/04\/12\/dolly-first-open-commercially-viable-instruction-tuned-llm","author":"Conover Mike","year":"2023","unstructured":"Mike Conover, Matt Hayes, Ankit Mathur, Jianwei Xie, Jun Wan, Sam Shah, Ali Ghodsi, Patrick Wendell, Matei Zaharia, and Reynold Xin. 2023. Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM. https:\/\/www.databricks.com\/blog\/2023\/04\/12\/dolly-first-open-commercially-viable-instruction-tuned-llm"},{"key":"e_1_3_2_1_13_1","volume-title":"Access to justice.","author":"Currie Ab","unstructured":"Ab Currie. 2009. The legal problems of everyday life. In Access to justice. Vol. 12. Emerald Group Publishing Limited, 1--41."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compedu.2015.10.003"},{"key":"e_1_3_2_1_15_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv pre-print server","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv pre-print server (2019). https:\/\/doi.org\/None arxiv:1810.04805"},{"key":"e_1_3_2_1_16_1","volume-title":"A survey on in-context learning. arXiv preprint arXiv:2301.00234","author":"Dong Qingxiu","year":"2022","unstructured":"Qingxiu Dong, Lei Li, Damai Dai, Ce Zheng, Zhiyong Wu, Baobao Chang, Xu Sun, Jingjing Xu, and Zhifang Sui. 2022. A survey on in-context learning. arXiv preprint arXiv:2301.00234 (2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"ELI5: Long Form Question Answering. arXiv pre-print server","author":"Fan Angela","year":"2019","unstructured":"Angela Fan, Yacine Jernite, Ethan Perez, David Grangier, Jason Weston, and Michael Auli. 2019. ELI5: Long Form Question Answering. arXiv pre-print server (2019). https:\/\/doi.org\/None arxiv:1907.09190"},{"key":"e_1_3_2_1_18_1","volume-title":"Everyday legal problems and the cost of justice in Canada: Overview report. Osgoode Legal Studies Research Paper 57","author":"Farrow Trevor CW","year":"2016","unstructured":"Trevor CW Farrow, Ab Currie, Nicole Aylwin, Lesley Jacobs, David Northrup, and Lisa Moore. 2016. Everyday legal problems and the cost of justice in Canada: Overview report. Osgoode Legal Studies Research Paper 57 (2016)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1177\/09646639221090132"},{"key":"e_1_3_2_1_20_1","volume-title":"Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca","author":"Hashimoto Rohan Taori","year":"2023","unstructured":"Rohan Taori Hashimoto, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori B. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca"},{"key":"e_1_3_2_1_21_1","volume-title":"A dataset for statutory reasoning in tax law entailment and question answering. arXiv preprint arXiv:2005.05257","author":"Holzenberger Nils","year":"2020","unstructured":"Nils Holzenberger, Andrew Blair-Stanek, and Benjamin Van Durme. 2020. A dataset for statutory reasoning in tax law entailment and question answering. arXiv preprint arXiv:2005.05257 (2020)."},{"key":"e_1_3_2_1_22_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA230388"},{"key":"e_1_3_2_1_25_1","volume-title":"Hurdles to Progress in Long-form Question Answering. arXiv pre-print server","author":"Krishna Kalpesh","year":"2021","unstructured":"Kalpesh Krishna, Aurko Roy, and Mohit Iyyer. 2021. Hurdles to Progress in Long-form Question Answering. arXiv pre-print server (2021). https:\/\/doi.org\/None arxiv:2103.06332"},{"key":"e_1_3_2_1_26_1","volume-title":"pre-trained GPT-J models with Pile of Law. arXiv pre-print server","author":"Lee Jieh-Sheng","year":"2023","unstructured":"Jieh-Sheng Lee. 2023. LexGPT 0.1: pre-trained GPT-J models with Pile of Law. arXiv pre-print server (2023). https:\/\/doi.org\/None arxiv:2306.05431"},{"key":"e_1_3_2_1_27_1","volume-title":"pre-trained GPT-J models with Pile of Law. arXiv preprint arXiv:2306.05431","author":"Lee Jieh-Sheng","year":"2023","unstructured":"Jieh-Sheng Lee. 2023. LexGPT 0.1: pre-trained GPT-J models with Pile of Law. arXiv preprint arXiv:2306.05431 (2023)."},{"key":"e_1_3_2_1_28_1","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems, Vol. 33 (2020), 9459--9474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_29_1","unstructured":"Chin-Yew Lin. [n. d.]. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_30_1","volume-title":"What Makes Good In-Context Examples for GPTarXiv preprint arXiv:2101.06804","author":"Liu Jiachang","year":"2021","unstructured":"Jiachang Liu, Dinghan Shen, Yizhe Zhang, Bill Dolan, Lawrence Carin, and Weizhu Chen. 2021. What Makes Good In-Context Examples for GPTarXiv preprint arXiv:2101.06804 (2021)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i20.30232"},{"key":"e_1_3_2_1_32_1","volume-title":"Falqu: Finding answers to legal questions. arXiv preprint arXiv:2304.05611","author":"Mansouri Behrooz","year":"2023","unstructured":"Behrooz Mansouri and Ricardo Campos. 2023. Falqu: Finding answers to legal questions. arXiv preprint arXiv:2304.05611 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods. https:\/\/github.com\/huggingface\/peft","author":"Paul Sourab Mangrulkar","year":"2022","unstructured":"Sourab Mangrulkar Paul, Sylvain Gugger, Lysandre Debut, Younes Belkada, and Sayak. 2022. PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods. https:\/\/github.com\/huggingface\/peft"},{"key":"e_1_3_2_1_34_1","volume-title":"KILT: a Benchmark for Knowledge Intensive Language Tasks. arXiv pre-print server","author":"Petroni Fabio","year":"2020","unstructured":"Fabio Petroni, Aleksandra Piktus, Angela Fan, Patrick Lewis, Majid Yazdani, Nicola, James Thorne, Yacine Jernite, Vassilis Plachouras, Tim Rockt\"aschel, and Sebastian Riedel. 2020. KILT: a Benchmark for Knowledge Intensive Language Tasks. arXiv pre-print server (2020). https:\/\/doi.org\/None arxiv:2009.02252"},{"key":"e_1_3_2_1_35_1","volume-title":"Elizabeth Andersen, Camilo Gutierrez Patino, Matthew Harman, Jorge A Morales, Ted Piccone, Natalia Rodriguez Cajamarca, Adriana Stephan, Kirssy Gonzalez, et al.","author":"Ponce Alejandro","year":"2019","unstructured":"Alejandro Ponce, Sarah Chamness Long, Elizabeth Andersen, Camilo Gutierrez Patino, Matthew Harman, Jorge A Morales, Ted Piccone, Natalia Rodriguez Cajamarca, Adriana Stephan, Kirssy Gonzalez, et al. 2019. Global Insights on Access to Justice 2019: Findings from the World Justice Project General Population Poll in 101 Countries. World Justice Project (2019), 1."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-08-097086-8.86161-9"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12626-022-00105-z"},{"key":"e_1_3_2_1_38_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language models are unsupervised multitask learners. OpenAI blog, Vol. 1, 8 (2019), 9."},{"key":"e_1_3_2_1_39_1","volume-title":"Question answering for privacy policies: Combining computational and legal perspectives. arXiv preprint arXiv:1911.00841","author":"Ravichander Abhilasha","year":"2019","unstructured":"Abhilasha Ravichander, Alan W Black, Shomir Wilson, Thomas Norton, and Norman Sadeh. 2019. Question answering for privacy policies: Combining computational and legal perspectives. arXiv preprint arXiv:1911.00841 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.174"},{"key":"e_1_3_2_1_41_1","volume-title":"Interpreting deep learning models in natural language processing: A review. arXiv preprint arXiv:2110.10470","author":"Sun Xiaofei","year":"2021","unstructured":"Xiaofei Sun, Diyi Yang, Xiaoya Li, Tianwei Zhang, Yuxian Meng, Han Qiu, Guoyin Wang, Eduard Hovy, and Jiwei Li. 2021. Interpreting deep learning models in natural language processing: A review. arXiv preprint arXiv:2110.10470 (2021)."},{"key":"e_1_3_2_1_42_1","volume-title":"MobiLlama: Towards Accurate and Lightweight Fully Transparent GPT. arXiv preprint arXiv:2402.16840","author":"Thawakar Omkar","year":"2024","unstructured":"Omkar Thawakar, Ashmal Vayani, Salman Khan, Hisham Cholakal, Rao M Anwer, Michael Felsberg, Tim Baldwin, Eric P Xing, and Fahad Shahbaz Khan. 2024. MobiLlama: Towards Accurate and Lightweight Fully Transparent GPT. arXiv preprint arXiv:2402.16840 (2024)."},{"key":"e_1_3_2_1_43_1","volume-title":"Symptoms are known by their companies: towards association guided disease diagnosis assistant. BMC bioinformatics","author":"Tiwari Abhisek","year":"2022","unstructured":"Abhisek Tiwari, Tulika Saha, Sriparna Saha, Pushpak Bhattacharyya, Shemim Begum, Minakshi Dhar, and Sarbajeet Tiwari. 2022. Symptoms are known by their companies: towards association guided disease diagnosis assistant. BMC bioinformatics, Vol. 23, 1 (2022), 556."},{"key":"e_1_3_2_1_44_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arxiv: 2302.13971 [cs.CL]"},{"key":"e_1_3_2_1_45_1","volume-title":"Legal prompt engineering for multilingual legal judgement prediction. arXiv preprint arXiv:2212.02199","author":"Trautmann Dietrich","year":"2022","unstructured":"Dietrich Trautmann, Alina Petrova, and Frank Schilder. 2022. Legal prompt engineering for multilingual legal judgement prediction. arXiv preprint arXiv:2212.02199 (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_47_1","unstructured":"Vectara. 2023. Hallucination Evaluation Model. https:\/\/huggingface.co\/vectara\/hallucination_evaluation_model. Accessed: 2024-05--20."},{"key":"e_1_3_2_1_48_1","unstructured":"Ben Wang. 2021. Mesh-Transformer-JAX: Model-Parallel Implementation of Transformer Language Model with JAX. https:\/\/github.com\/kingoflolz\/mesh-transformer-jax"},{"key":"e_1_3_2_1_49_1","unstructured":"Ben Wang and Aran Komatsuzaki. 2021. GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model. https:\/\/github.com\/kingoflolz\/mesh-transformer-jax."},{"key":"e_1_3_2_1_50_1","volume-title":"Chi, Quoc Le, and Denny Zhou","author":"Wei Jason","year":"2023","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou. 2023. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. arxiv: 2201.11903 [cs.CL]"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1101"},{"key":"e_1_3_2_1_52_1","volume-title":"Tinyllama: An open-source small language model. arXiv preprint arXiv:2401.02385","author":"Zhang Peiyuan","year":"2024","unstructured":"Peiyuan Zhang, Guangtao Zeng, Tianduo Wang, and Wei Lu. 2024. Tinyllama: An open-source small language model. arXiv preprint arXiv:2401.02385 (2024)."},{"key":"e_1_3_2_1_53_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","unstructured":"Lucia Zheng Neel Guha Brandon R. Anderson Peter Henderson and Daniel E. Ho. [n. d.]. When does pretraining help? ACM. https:\/\/doi.org\/10.1145\/3462757.3466088","DOI":"10.1145\/3462757.3466088"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6519"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3680082","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3680082","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:18Z","timestamp":1750294698000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3680082"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":55,"alternative-id":["10.1145\/3627673.3680082","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3680082","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}