{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:28:37Z","timestamp":1776115717266,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T00:00:00Z","timestamp":1742860800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,25]]},"DOI":"10.1145\/3709025.3712219","type":"proceedings-article","created":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T18:20:11Z","timestamp":1741890011000},"page":"169-193","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["A Reasoning-Focused Legal Retrieval Benchmark"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8602-0007","authenticated-orcid":false,"given":"Lucia","family":"Zheng","sequence":"first","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5120-1726","authenticated-orcid":false,"given":"Neel","family":"Guha","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2442-6042","authenticated-orcid":false,"given":"Javokhir","family":"Arifov","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0265-5071","authenticated-orcid":false,"given":"Sarah","family":"Zhang","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9999-2737","authenticated-orcid":false,"given":"Michal","family":"Skreta","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6155-649X","authenticated-orcid":false,"given":"Christopher D.","family":"Manning","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3938-0541","authenticated-orcid":false,"given":"Peter","family":"Henderson","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, New Jersey, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2195-5469","authenticated-orcid":false,"given":"Daniel E.","family":"Ho","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,3,25]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.305"},{"key":"e_1_3_2_1_3_1","volume-title":"How to Build a Better Bar Exam","author":"Curcio Andrea Anne","year":"2018","unstructured":"Andrea Anne Curcio, Carol L Chomsky, and Eileen R Kaufman. 2018. How to Build a Better Bar Exam. New York State Bar Association Journal (2018), 37--41."},{"key":"e_1_3_2_1_4_1","volume-title":"Large legal fictions: Profiling legal hallucinations in large language models. arXiv preprint arXiv:2401.01301","author":"Dahl Matthew","year":"2024","unstructured":"Matthew Dahl, Varun Magesh, Mirac Suzgun, and Daniel E Ho. 2024. Large legal fictions: Profiling legal hallucinations in large language models. arXiv preprint arXiv:2401.01301 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_6_1","volume-title":"Overview of Benchmark Datasets and Methods for the Legal Information Extraction\/Entailment Competition (COLIEE)","author":"Goebel Randy","year":"2024","unstructured":"Randy Goebel, Yoshinobu Kano, Mi-Young Kim, Juliano Rabelo, Ken Satoh, and Masaharu Yoshioka. 2024. Overview of Benchmark Datasets and Methods for the Legal Information Extraction\/Entailment Competition (COLIEE) 2024. In New Frontiers in Artificial Intelligence, Toyotaro Suzumura and Mayumi Bono (Eds.). Springer Nature Singapore, Singapore, 109--124."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2746090.2746096"},{"key":"e_1_3_2_1_8_1","volume-title":"Brandon Waldon, Daniel Rockmore, Diego Zambrano, et al.","author":"Guha Neel","year":"2024","unstructured":"Neel Guha, Julian Nyarko, Daniel Ho, Christopher R\u00e9, Adam Chilton, Alex Chohlas-Wood, Austin Peters, Brandon Waldon, Daniel Rockmore, Diego Zambrano, et al. 2024. Legalbench: A collaboratively built benchmark for measuring legal reasoning in large language models. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. Proceedings of the International Conference on Learning Representations (ICLR) (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"A dataset for statutory reasoning in tax law entailment and question answering. arXiv preprint arXiv:2005.05257","author":"Holzenberger Nils","year":"2020","unstructured":"Nils Holzenberger, Andrew Blair-Stanek, and Benjamin Van Durme. 2020. A dataset for statutory reasoning in tax law entailment and question answering. arXiv preprint arXiv:2005.05257 (2020)."},{"key":"e_1_3_2_1_11_1","volume-title":"CLERC: A Dataset for Legal Case Retrieval and Retrieval-Augmented Analysis Generation. arXiv preprint arXiv:2406.17186","author":"Hou Abe Bohan","year":"2024","unstructured":"Abe Bohan Hou, Orion Weller, Guanghui Qin, Eugene Yang, Dawn Lawrie, Nils Holzenberger, Andrew Blair-Stanek, and Benjamin Van Durme. 2024. CLERC: A Dataset for Legal Case Retrieval and Retrieval-Augmented Analysis Generation. arXiv preprint arXiv:2406.17186 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"Query expansion by prompting large language models. arXiv preprint arXiv:2305.03653","author":"Jagerman Rolf","year":"2023","unstructured":"Rolf Jagerman, Honglei Zhuang, Zhen Qin, Xuanhui Wang, and Michael Bendersky. 2023. Query expansion by prompting large language models. arXiv preprint arXiv:2305.03653 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Mill: Mutual verification with large language models for zero-shot query expansion. arXiv preprint arXiv:2310.19056","author":"Jia Pengyue","year":"2023","unstructured":"Pengyue Jia, Yiding Liu, Xiangyu Zhao, Xiaopeng Li, Changying Hao, Shuaiqiang Wang, and Dawei Yin. 2023. Mill: Mutual verification with large language models for zero-shot query expansion. arXiv preprint arXiv:2310.19056 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1147"},{"key":"e_1_3_2_1_16_1","article-title":"Gpt-4 passes the bar exam","volume":"382","author":"Katz Daniel Martin","year":"2024","unstructured":"Daniel Martin Katz, Michael James Bommarito, Shang Gao, and Pablo Arredondo. 2024. Gpt-4 passes the bar exam. Philosophical Transactions of the Royal Society A 382, 2270 (2024), 20230254.","journal-title":"Philosophical Transactions of the Royal Society A"},{"key":"e_1_3_2_1_17_1","volume-title":"Natural Questions: a Benchmark for Question Answering Research. Transactions of the Association of Computational Linguistics","author":"Kwiatkowski Tom","year":"2019","unstructured":"Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redfield, Michael Collins, Ankur Parikh, Chris Alberti, Danielle Epstein, Illia Polosukhin, Matthew Kelcey, Jacob Devlin, Kenton Lee, Kristina N. Toutanova, Llion Jones, Ming-Wei Chang, Andrew Dai, Jakob Uszkoreit, Quoc Le, and Slav Petrov. 2019. Natural Questions: a Benchmark for Question Answering Research. Transactions of the Association of Computational Linguistics (2019)."},{"key":"e_1_3_2_1_18_1","volume-title":"Wortman Vaughan (Eds.)","volume":"34","author":"Lazaridou Angeliki","year":"2021","unstructured":"Angeliki Lazaridou, Adhi Kuncoro, Elena Gribovskaya, Devang Agrawal, Adam Liska, Tayfun Terzi, Mai Gimenez, Cyprien de Masson d'Autume, Tomas Kocisky, Sebastian Ruder, Dani Yogatama, Kris Cao, Susannah Young, and Phil Blunsom. 2021. Mind the Gap: Assessing Temporal Generalization in Neural Language Models. In Advances in Neural Information Processing Systems, M. Ranzato, A. Beygelzimer, Y. Dauphin, P.S. Liang, and J. Wortman Vaughan (Eds.), Vol. 34. Curran Associates, Inc., 29348--29363. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/f5bf0ba0a17ef18f9607774722f5698c-Paper.pdf"},{"key":"e_1_3_2_1_19_1","volume-title":"Corpus-Steered Query Expansion with Large Language Models. arXiv preprint arXiv:2402.18031","author":"Lei Yibin","year":"2024","unstructured":"Yibin Lei, Yu Cao, Tianyi Zhou, Tao Shen, and Andrew Yates. 2024. Corpus-Steered Query Expansion with Large Language Models. arXiv preprint arXiv:2402.18031 (2024)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308774.3308781"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.468"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i20.30232"},{"key":"e_1_3_2_1_23_1","unstructured":"LSC. 2021. Eviction Laws Database: Local Dataset. Prepared by the Center for Public Health Law Research at Temple University's Beasley School of Law for Legal Services Corporation. https:\/\/www.lsc.gov\/initiatives\/effect-state-local-laws-evictions\/lsc-eviction-laws-database."},{"key":"e_1_3_2_1_24_1","volume-title":"Generative AI Legal Landscape","author":"Ma Megan","year":"2024","unstructured":"Megan Ma, Aparna Sinha, Ankit Tandon, and Jennifer Richards. 2024. Generative AI Legal Landscape 2024. Technical Report. Technical report."},{"key":"e_1_3_2_1_25_1","volume-title":"Ho","author":"Magesh Varun","year":"2024","unstructured":"Varun Magesh, Faiz Surani, Matthew Dahl, Mirac Suzgun, Christopher D. Manning, and Daniel E. Ho. 2024. Hallucination-Free? Assessing the Reliability of Leading AI Legal Research Tools. arXiv:2405.20362"},{"key":"e_1_3_2_1_26_1","volume-title":"LePaRD: A Large-Scale Dataset of Judges Citing Precedents. arXiv preprint arXiv:2311.09356","author":"Mahari Robert","year":"2023","unstructured":"Robert Mahari, Dominik Stammbach, Elliott Ash, and AlexSandy' Pentland. 2023. LePaRD: A Large-Scale Dataset of Judges Citing Precedents. arXiv preprint arXiv:2311.09356 (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3184558.3192301"},{"key":"e_1_3_2_1_28_1","volume-title":"A More Realistic Bar Exam Will Benefit Legal Education. The Bar Examiner 92, 2","author":"McFarlin Timothy","year":"2023","unstructured":"Timothy McFarlin. 2023. A More Realistic Bar Exam Will Benefit Legal Education. The Bar Examiner 92, 2 (2023)."},{"key":"e_1_3_2_1_29_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.4630270302"},{"key":"e_1_3_2_1_31_1","volume-title":"Roberto Lotufo, and Rodrigo Nogueira.","author":"Rosa Guilherme Moraes","year":"2021","unstructured":"Guilherme Moraes Rosa, Ruan Chaves Rodrigues, Roberto Lotufo, and Rodrigo Nogueira. 2021. Yes, bm25 is a strong baseline for legal case retrieval. arXiv preprint arXiv:2105.05686 (2021)."},{"key":"e_1_3_2_1_32_1","volume-title":"Benchmarking and building long-context retrieval models with loco and m2-bert. arXiv preprint arXiv:2402.07440","author":"Saad-Falcon Jon","year":"2024","unstructured":"Jon Saad-Falcon, Daniel Y Fu, Simran Arora, Neel Guha, and Christopher R\u00e9. 2024. Benchmarking and building long-context retrieval models with loco and m2-bert. arXiv preprint arXiv:2402.07440 (2024)."},{"key":"e_1_3_2_1_33_1","volume-title":"Legal information retrieval for understanding statutory terms. Artificial Intelligence and Law","author":"\u0160avelka Jarom\u00edr","year":"2022","unstructured":"Jarom\u00edr \u0160avelka and Kevin D Ashley. 2022. Legal information retrieval for understanding statutory terms. Artificial Intelligence and Law (2022), 1--45."},{"key":"e_1_3_2_1_34_1","volume-title":"BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval. arXiv preprint arXiv:2407.12883","author":"Su Hongjin","year":"2024","unstructured":"Hongjin Su, Howard Yen, Mengzhou Xia, Weijia Shi, Niklas Muennighoff, Han-yu Wang, Haisu Liu, Quan Shi, Zachary S Siegel, Michael Tang, et al. 2024. BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval. arXiv preprint arXiv:2407.12883 (2024)."},{"key":"e_1_3_2_1_35_1","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=wCu6T5xFjeJ"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024","author":"Santosh","year":"2024","unstructured":"Santosh T.y.s.s., Rashid Haddad, and Matthias Grabmair. 2024. ECtHR-PCR: A Dataset for Precedent Understanding and Prior Case Retrieval in the European Court of Human Rights. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), Nicoletta Calzolari, Min-Yen Kan, Veronique Hoste, Alessandro Lenci, Sakriani Sakti, and Nianwen Xue (Eds.). ELRA and ICCL, Torino, Italia, 5473--5483. https:\/\/aclanthology.org\/2024.lrec-main.486"},{"key":"e_1_3_2_1_37_1","volume-title":"Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533","author":"Wang Liang","year":"2022","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, and Furu Wei. 2022. Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533 (2022)."},{"key":"e_1_3_2_1_38_1","volume-title":"Improving text embeddings with large language models. arXiv preprint arXiv:2401.00368","author":"Wang Liang","year":"2023","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Linjun Yang, Rangan Majumder, and Furu Wei. 2023. Improving text embeddings with large language models. arXiv preprint arXiv:2401.00368 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Advances in Neural Information Processing Systems","author":"Wang Wenhui","year":"2020","unstructured":"Wenhui Wang, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. 2020. MiniLM: Deep Self-Attention Distillation for Task-Agnostic Compression of Pre-Trained Transformers. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 5776--5788. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_40_1","volume-title":"BIRCO: A Benchmark of Information Retrieval Tasks with Complex Objectives. arXiv preprint arXiv:2402.14151","author":"Wang Xiaoyue","year":"2024","unstructured":"Xiaoyue Wang, Jianyou Wang, Weili Cao, Kaicheng Wang, Ramamohan Paturi, and Leon Bergen. 2024. BIRCO: A Benchmark of Information Retrieval Tasks with Complex Objectives. arXiv preprint arXiv:2402.14151 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Explainable Multi-hop Question Answering. In Conference on Empirical Methods in Natural Language Processing (EMNLP).","author":"Yang Zhilin","unstructured":"Zhilin Yang, Peng Qi, Saizheng Zhang, Yoshua Bengio, William W. Cohen, Ruslan Salakhutdinov, and Christopher D. Manning. 2018. HotpotQA: A Dataset for Diverse, Explainable Multi-hop Question Answering. In Conference on Empirical Methods in Natural Language Processing (EMNLP)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3462757.3466088"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6519"}],"event":{"name":"CSLAW '25: Symposium on Computer Science and Law","location":"Munich Germany","acronym":"CSLAW '25","sponsor":["ACM Association for Computing Machinery"]},"container-title":["Proceedings of the Symposium on Computer Science and Law on ZZZ"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3709025.3712219","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3709025.3712219","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T19:48:07Z","timestamp":1756237687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3709025.3712219"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,25]]},"references-count":43,"alternative-id":["10.1145\/3709025.3712219","10.1145\/3709025"],"URL":"https:\/\/doi.org\/10.1145\/3709025.3712219","relation":{},"subject":[],"published":{"date-parts":[[2025,3,25]]},"assertion":[{"value":"2025-03-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}