{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T02:45:18Z","timestamp":1781318718881,"version":"3.54.1"},"reference-count":50,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100013804","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013804","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004735","name":"Hunan Provincial Natural Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004735","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.knosys.2026.116267","type":"journal-article","created":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T15:57:52Z","timestamp":1779897472000},"page":"116267","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Automated data synthesis and retrieval-augmented generation for legal large language models"],"prefix":"10.1016","volume":"347","author":[{"given":"Wenqi","family":"Ren","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lixing","family":"Shen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiang","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shaohui","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yinxia","family":"Hong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6601-2958","authenticated-orcid":false,"given":"Jiawei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2611-2559","authenticated-orcid":false,"given":"Da","family":"Cao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2026.116267_b1","series-title":"The 39th Annual International Conference of the IEEE Engineering in Medicine and Biology Society","first-page":"471","article-title":"Rotational data augmentation for electroencephalographic data","author":"Krell","year":"2017"},{"key":"10.1016\/j.knosys.2026.116267_b2","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"113","article-title":"AutoAugment: Learning augmentation strategies from data","author":"Cubuk","year":"2019"},{"key":"10.1016\/j.knosys.2026.116267_b3","series-title":"2020 International Conference on Computer Communication and Network Security","first-page":"191","article-title":"A survey of text data augmentation","author":"Liu","year":"2020"},{"issue":"9","key":"10.1016\/j.knosys.2026.116267_b4","doi-asserted-by":"crossref","first-page":"2917","DOI":"10.1109\/TCSVT.2019.2935128","article-title":"Data augmentation using random image cropping and patching for deep CNNs","volume":"30","author":"Takahashi","year":"2020","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2026.116267_b5","doi-asserted-by":"crossref","unstructured":"H. Dai, Z. Liu, W. Liao, X. Huang, Y. Cao, Z. Wu, L. Zhao, S. Xu, F. Zeng, W. Liu, N. Liu, S. Li, D. Zhu, H. Cai, L. Sun, Q. Li, D. Shen, T. Liu, X. Li, AugGPT: Leveraging ChatGPT for Text Data Augmentation, IEEE Trans. Big Data 11 (3) 907\u2013918, 2025.","DOI":"10.1109\/TBDATA.2025.3536934"},{"key":"10.1016\/j.knosys.2026.116267_b6","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics, ACL 2024 - Student Research Workshop, Bangkok, Thailand,August 11-16, 2024","first-page":"411","article-title":"Can LLMs augment low-resource reading comprehension datasets? opportunities and challenges","author":"Samuel","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b7","series-title":"The 11th International Conference on Learning Representations","article-title":"Planning with large language models for code generation","author":"Zhang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b8","series-title":"BigTranslate: Augmenting large language models with multilingual translation capability over 100 languages","author":"Yang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b9","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1162\/tacl_a_00632","article-title":"Benchmarking large language models for news summarization","volume":"12","author":"Zhang","year":"2024","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"10.1016\/j.knosys.2026.116267_b10","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9579","article-title":"LISA: Reasoning segmentation via large language model","author":"Lai","year":"2024"},{"issue":"12","key":"10.1016\/j.knosys.2026.116267_b11","doi-asserted-by":"crossref","first-page":"6074","DOI":"10.1109\/JBHI.2023.3316750","article-title":"Large AI models in health informatics: Applications, challenges, and the future","volume":"27","author":"Qiu","year":"2023","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"10.1016\/j.knosys.2026.116267_b12","series-title":"Findings of the Association for Computational Linguistics","first-page":"13064","article-title":"FinTral: A family of GPT-4 level multimodal financial large language models","author":"Bhatia","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b13","series-title":"A survey on data synthesis and augmentation for large language models","author":"Wang","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b14","series-title":"ChatGPT-4 outperforms experts and crowd workers in annotating political Twitter messages with zero-shot learning","author":"T\u00f6rnberg","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b15","series-title":"Findings of the Association for Computational Linguistics: ACL 2023","first-page":"8003","article-title":"Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes","author":"Hsieh","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b16","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"1487","article-title":"Coannotating: Uncertainty-guided work allocation between human and large language models for data annotation","author":"Li","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b17","series-title":"A survey on knowledge distillation of large language models","author":"Xu","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b18","series-title":"Textbooks are all you need","author":"Gunasekar","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b19","series-title":"Orca 2: Teaching small language models how to reason","author":"Mitra","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b20","series-title":"Advances in Neural Information Processing Systems","article-title":"DDK: Distilling domain knowledge for efficient large language models","author":"Liu","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b21","series-title":"The 25th International Conference on Intelligent Transportation Systems","first-page":"3893","article-title":"Domain knowledge distillation from large language model: An empirical study in the autonomous driving domain","author":"Tang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b22","series-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","first-page":"13484","article-title":"Self-instruct: Aligning language models with self-generated instructions","author":"Wang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b23","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.knosys.2026.116267_b24","first-page":"1","article-title":"Aligning crowd-sourced human feedback for reinforcement learning on code generation by large language models","volume":"11","author":"Wong","year":"2025","journal-title":"IEEE Trans. Big Data"},{"key":"10.1016\/j.knosys.2026.116267_b25","series-title":"Proceedings of the International Conference on Intelligent Systems and Data Science","first-page":"271","article-title":"A practical approach to leverage knowledge graphs for legal query","author":"Dang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b26","series-title":"Recent Challenges in Intelligent Information and Database Systems","first-page":"161","article-title":"Knowledge graph-based legal query system with LLM and retrieval augmented generation","volume":"vol. 2494","author":"Dung","year":"2025"},{"issue":"11","key":"10.1016\/j.knosys.2026.116267_b27","doi-asserted-by":"crossref","first-page":"3148","DOI":"10.1109\/TAI.2025.3567369","article-title":"TrumorGPT: Graph-based retrieval-augmented large language model for fact-checking","volume":"6","author":"Hang","year":"2025","journal-title":"IEEE Trans. Artif. Intell."},{"key":"10.1016\/j.knosys.2026.116267_b28","series-title":"Legal Knowledge and Information Systems (JURIX 2024)","first-page":"60","article-title":"Using LLMs to discover legal factors","author":"Gray","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b29","doi-asserted-by":"crossref","first-page":"18253","DOI":"10.1109\/ACCESS.2025.3533217","article-title":"Exploring LLMs applications in law: A literature review on current legal NLP approaches","volume":"13","author":"Siino","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.knosys.2026.116267_b30","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1186\/s40537-024-01046-w","article-title":"Big data and AI-driven evidence analysis: a global perspective on citation trends, accessibility, and future research in legal applications","volume":"11","author":"Kerdvibulvech","year":"2024","journal-title":"J. Big Data"},{"key":"10.1016\/j.knosys.2026.116267_b31","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2020","first-page":"2898","article-title":"LEGAL-BERT: The muppets straight out of law school","author":"Chalkidis","year":"2020"},{"key":"10.1016\/j.knosys.2026.116267_b32","series-title":"When Does Pretraining Help? Assessing Self-Supervised Learning for Law and the Casehold Dataset of 53,000+ Legal Holdings","first-page":"159","author":"Zheng","year":"2021"},{"key":"10.1016\/j.knosys.2026.116267_b33","series-title":"Proceedings of the 19th International Conference on Artificial Intelligence and Law","first-page":"187","article-title":"Pre-trained language models for the legal domain: A case study on Indian law","author":"Paul","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b34","series-title":"Proceedings of the 33rd International Conference on Information and Knowledge Management","first-page":"4882","article-title":"LawLLM: Law large language model for the US legal system","author":"Shu","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b35","article-title":"ChatGPT: Optimizing language models for dialogue","author":"OpenAI","year":"2022","journal-title":"OpenAI Res."},{"key":"10.1016\/j.knosys.2026.116267_b36","series-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.knosys.2026.116267_b37","series-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b38","series-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b39","series-title":"Baichuan 2: Open large-scale language models","author":"Yang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b40","doi-asserted-by":"crossref","unstructured":"Z. Du, Y. Qian, X. Liu, M. Ding, J. Qiu, Z. Yang, J. Tang, GLM: General Language Model Pretraining with Autoregressive Blank Infilling, in: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics, 2022, pp. 320\u2013335.","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"10.1016\/j.knosys.2026.116267_b41","series-title":"The 11th International Conference on Learning Representations","article-title":"GLM-130B: An open bilingual pre-trained model","author":"Zeng","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b42","series-title":"ChatLaw: Open-source legal large language model with integrated external knowledge bases","author":"Cui","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b43","series-title":"HanFei-1.0","author":"He","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b44","series-title":"Lexilaw: A scalable legal language model for comprehensive legal understanding","author":"Li","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b45","series-title":"XieZhi: Chinese law large language model","author":"Liu","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b46","series-title":"LawGPT: A Chinese legal knowledge-enhanced large language model","author":"Zhou","year":"2024"},{"key":"10.1016\/j.knosys.2026.116267_b47","series-title":"Efficient and effective text encoding for Chinese LLaMA and Alpaca","author":"Cui","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b48","series-title":"ChatLaw","author":"Cui","year":"2023"},{"key":"10.1016\/j.knosys.2026.116267_b49","doi-asserted-by":"crossref","first-page":"10088","DOI":"10.52202\/075280-0441","article-title":"Qlora: Efficient finetuning of quantized llms","volume":"36","author":"Dettmers","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116267_b50","series-title":"C-pack: Packaged resources to advance general Chinese embedding","author":"Xiao","year":"2023"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126009937?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126009937?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T12:18:27Z","timestamp":1780921107000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126009937"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":50,"alternative-id":["S0950705126009937"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116267","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Automated data synthesis and retrieval-augmented generation for legal large language models","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116267","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"116267"}}