{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T09:37:56Z","timestamp":1764841076563,"version":"3.46.0"},"reference-count":16,"publisher":"Oxford University Press (OUP)","issue":"1","license":[{"start":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T00:00:00Z","timestamp":1764806400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/pages\/standard-publication-reuse-rights"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,1,27]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>This article investigates the use of machine learning (ML) for classifying legal texts, focusing on the challenges posed by imbalanced class distributions in datasets. In Brazil, where legal processes are numerous and complex, ML could improve judicial efficiency and decision-making speed. However, skewed data distributions make it difficult for standard algorithms to perform well across all classes, often neglecting minority categories. To address this, imbalance learning strategies such as oversampling, undersampling, and hybrid methods are applied to balance data sets and improve classification accuracy. Using real legal data from S\u00e3o Paulo, this study evaluates these techniques\u2019 effectiveness in binary classification, providing valuable insights into their applicability in legal settings.<\/jats:p>","DOI":"10.1093\/jigpal\/jzaf044","type":"journal-article","created":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T08:02:08Z","timestamp":1747382528000},"source":"Crossref","is-referenced-by-count":0,"title":["Comparative analysis of data sampling techniques for legal text classification in real-world scenarios"],"prefix":"10.1093","volume":"34","author":[{"given":"Daniela L","family":"Freire","sequence":"first","affiliation":[{"name":"University of Sao Paulo , Sao Paulo, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Leandro O","family":"da Silva","sequence":"additional","affiliation":[{"name":"Federal University of Sao Paulo , S\u00e3o Jos\u00e9 dos Campos, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M\u00e1rcio de","family":"S  Dias","sequence":"additional","affiliation":[{"name":"Federal University of Catal\u00e3o , Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alex M G","family":"de Almeida","sequence":"additional","affiliation":[{"name":"Ourinhos College of Technology , Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adriano","family":"Rivolli","sequence":"additional","affiliation":[{"name":"Federal Technological University of Paran\u00e1 , Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fab\u00edola S F","family":"Pereira","sequence":"additional","affiliation":[{"name":"Federal University of Uberl\u00e2ndia , Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Giliard A","family":"de Godoi","sequence":"additional","affiliation":[{"name":"Federal Technological University of Paran\u00e1 , Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M\u00e1rcio P","family":"Basgalupp","sequence":"additional","affiliation":[{"name":"Federal University of Sao Paulo , S\u00e3o Jos\u00e9 dos Campos, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andre C P L F","family":"de Carvalho","sequence":"additional","affiliation":[{"name":"University of Sao Paulo, Sao Paulo , Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2025,12,4]]},"reference":[{"key":"2025120404335345800_ref1","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","article-title":"A study of the behavior of several methods for balancing machine learning training data","volume":"6","author":"Batista","year":"2004","journal-title":"ACM SIGKDD Explorations Newsletter"},{"key":"2025120404335345800_ref2","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","article-title":"SMOTE: synthetic minority over-sampling technique","volume":"16","author":"Chawla","year":"2002","journal-title":"J Artif Intell Res"},{"key":"2025120404335345800_ref3","first-page":"355","article-title":"Text classification in the Brazilian legal domain","volume-title":"Intern. Conf. on Enterprise Information Systems","author":"Coelho","year":"2022"},{"volume-title":"Justi\u00e7a em n\u00fameros 2022. Justi\u00e7a em n\u00fameros 2022","year":"2022","author":"Conselho Nacional de Justi\u00e7a Departamento de Pesquisas Judici\u00e1rias","key":"2025120404335345800_ref4"},{"volume-title":"Precedentes qualificados 2023","author":"Superior Tribunal de Justi\u00e7a Secretaria de Jurisprud\u00eancia","key":"2025120404335345800_ref5"},{"key":"2025120404335345800_ref6","doi-asserted-by":"crossref","first-page":"2159","DOI":"10.1109\/JSTARS.2019.2922297","article-title":"Dynamic synthetic minority over-sampling technique-based rotation forest for the classification of imbalanced hyperspectral data","volume":"12","author":"Feng","year":"2019","journal-title":"IEEE J Select Topics Appl Earth Observ Remote Sensing"},{"key":"2025120404335345800_ref7","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1016\/j.eswa.2016.12.035","article-title":"Learning from class-imbalanced data: review of methods and applications","volume":"73","author":"Haixiang","year":"2017","journal-title":"Expert Syst Appl"},{"key":"2025120404335345800_ref8","doi-asserted-by":"crossref","first-page":"878","DOI":"10.1007\/11538059_91","article-title":"Borderline-smote: a new over-sampling method in imbalanced data sets learning","volume-title":"Advances in Intelligent Computing: Intern. Conf. on Intelligent Computing","author":"Han","year":"2005"},{"key":"2025120404335345800_ref9","first-page":"1322","article-title":"Adasyn: Adaptive synthetic sampling approach for imbalanced learning","volume-title":"IEEE Intern Joint Conf Neural Network","author":"He","year":"2008"},{"key":"2025120404335345800_ref10","first-page":"769","article-title":"Two modifications of CNN","volume":"6","author":"Ivan","year":"1976","journal-title":"IEEE Trans Syst Man Commun"},{"key":"2025120404335345800_ref11","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.116694","article-title":"Obgan: minority oversampling near borderline with generative adversarial networks","volume":"197","author":"Jo","year":"2022","journal-title":"Exp Syst Appl"},{"volume-title":"Imbalanced Learning: Foundations, Algorithms, and Applications","year":"2013","author":"Ma","key":"2025120404335345800_ref12"},{"key":"2025120404335345800_ref13","first-page":"2825","article-title":"Scikit-learn: machine learning in python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J Machine Learn Res"},{"key":"2025120404335345800_ref14","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1016\/j.ins.2014.08.051","article-title":"SMOTE\u2013IPF: addressing the noisy and borderline examples problem in imbalanced classification by a re-sampling method with filtering","volume":"291","author":"S\u00e1ez","year":"2015","journal-title":"Inform Sci"},{"key":"2025120404335345800_ref15","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1016\/0306-4573(88)90021-0","article-title":"Term-weighting approaches in automatic text retrieval","volume":"24","author":"Salton","year":"1988","journal-title":"Inform Process Manag"},{"key":"2025120404335345800_ref16","doi-asserted-by":"crossref","first-page":"731","DOI":"10.1007\/978-3-540-37256-1_89","article-title":"Under-sampling approaches for improving prediction of the minority class in an imbalanced dataset","volume-title":"Intelligent Control and Automation: Intern. Conf. on Intelligent Computing","author":"Yen","year":"2006"}],"container-title":["Logic Journal of the IGPL"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/jigpal\/article-pdf\/34\/1\/jzaf044\/65735851\/jzaf044.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/jigpal\/article-pdf\/34\/1\/jzaf044\/65735851\/jzaf044.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T09:34:04Z","timestamp":1764840844000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/jigpal\/article\/doi\/10.1093\/jigpal\/jzaf044\/8364718"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,4]]},"references-count":16,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1,27]]}},"URL":"https:\/\/doi.org\/10.1093\/jigpal\/jzaf044","relation":{},"ISSN":["1367-0751","1368-9894"],"issn-type":[{"type":"print","value":"1367-0751"},{"type":"electronic","value":"1368-9894"}],"subject":[],"published-other":{"date-parts":[[2026,2]]},"published":{"date-parts":[[2025,12,4]]},"article-number":"jzaf044"}}