{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T03:47:01Z","timestamp":1768880821042,"version":"3.49.0"},"reference-count":91,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"NSF","award":["1852249"],"award-info":[{"award-number":["1852249"]}]},{"name":"NSA","award":["H98230-20-1-0417"],"award-info":[{"award-number":["H98230-20-1-0417"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Rel."],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1109\/tr.2022.3156126","type":"journal-article","created":{"date-parts":[[2022,3,25]],"date-time":"2022-03-25T19:42:37Z","timestamp":1648237357000},"page":"657-673","source":"Crossref","is-referenced-by-count":24,"title":["Construction and Evaluation of a High-Quality Corpus for Legal Intelligence Using Semiautomated Approaches"],"prefix":"10.1109","volume":"71","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7088-9752","authenticated-orcid":false,"given":"Haihua","family":"Chen","sequence":"first","affiliation":[{"name":"Department of Information Science, University of North Texas, Denton, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lavinia F.","family":"Pieptea","sequence":"additional","affiliation":[{"name":"Department of Mathematics, University of North Texas, Denton, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9394-6748","authenticated-orcid":false,"given":"Junhua","family":"Ding","sequence":"additional","affiliation":[{"name":"Department of Information Science, University of North Texas, Denton, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.466"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00364"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1568234.1568246"},{"key":"ref4","article-title":"Data quality in the deep learning ERA: Active semi-supervised learning and text normalization for natural language understanding","author":"Lourentzou","year":"2019"},{"key":"ref5","article-title":"CUAD: An expert-annotated NLP dataset for legal contract review","volume-title":"CoRR","volume":"abs\/2103.06268","author":"Hendrycks","year":"2021"},{"issue":"1","key":"ref6","first-page":"13","article-title":"Towards a scienceof corpus annotation: A new methodological challenge for corpus linguistics","volume":"22","author":"Hovy","year":"2010","journal-title":"Int. J. Transl."},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05855-6"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1670"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3386252"},{"key":"ref11","first-page":"2672","article-title":"Generative adversarial Nets","volume-title":"Proc. 27th Int. Con. Neural Inf. Process. Syst.","author":"Goodfellow","year":"2014"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.2307\/2984875"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015350"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-0135-7_48"},{"key":"ref15","first-page":"6694","article-title":"Neural expectation maximization","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Greff","year":"2017"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.191"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.11613\/BM.2012.031"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2953087"},{"issue":"2","key":"ref19","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1023\/A:1007692713085","article-title":"Text classification from labeled and unlabeled documents using EM","volume":"39","author":"Nigam","year":"2000","journal-title":"Mach. Learn."},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TR.2021.3070863"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/766"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-010-9104-x"},{"key":"ref23","first-page":"4061","article-title":"Increasing argument annotation reproducibility by using inter-annotator agreement to improve guidelines","volume-title":"Proc. 11th Int. Conf. Lang. Resour. Eval.","author":"Teruel","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2746090.2746096"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/567"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-019-09242-3"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1276318.1276362"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10503-020-09519-x"},{"key":"ref29","first-page":"987","article-title":"Classifying arguments by scheme","volume-title":"Proc. 49th Annu. Meeting Assoc. Comput. Linguistics: Hum. Lang. Technol.","author":"Feng","year":"2011"},{"key":"ref30","first-page":"1","article-title":"Applying argument extraction to improve legal information retrieval","volume-title":"Proc. ArgNLP","author":"Ashley","year":"2014"},{"key":"ref31","first-page":"1802","article-title":"Learning to predict charges for legal judgment via self-attentive capsule network","volume-title":"Proc. Eur. Conf. Artif. Intell.","author":"Le","year":"2020"},{"key":"ref32","article-title":"A survey on text classification: From shallow to deep learning","volume-title":"CoRR","volume":"abs\/2008.00364","author":"Li","year":"2021"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3439726"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-006-9002-4"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/1066677.1066746"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.17706\/jcp.12.5.451-461"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2514601.2514622"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.23940\/ijpe.20.01.p13.118129"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.23940\/ijpe.20.06.p16.979990"},{"key":"ref40","first-page":"515","article-title":"A comparative study of classifying legal documents with neural networks","volume-title":"Proc. IEEE Federated Conf. Comput. Sci. Inf. Syst.","author":"Undavia","year":"2018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1424"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2943668"},{"key":"ref43","article-title":"Large scale legal text classification using transformer models","volume-title":"CoRR","volume":"abs\/2010.12871","author":"Shaheen","year":"2021"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.3390\/ijerph110505170"},{"key":"ref45","first-page":"10 842","article-title":"Data valuation using reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn","author":"Yoon"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2019.07.063"},{"key":"ref47","article-title":"DQI: Measuring data quality in NLP","volume-title":"CoRR","volume":"abs\/2005.00816","author":"Mishra","year":"2021"},{"key":"ref48","article-title":"Efficient knowledge graph accuracy evaluation","volume-title":"CoRR","volume":"abs\/1907.09657","author":"Gao","year":"2021"},{"key":"ref49","article-title":"Data validation for machine learning","volume-title":"Proc. 2nd SysML Conf.","author":"Breck","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/s00296-019-04265-0"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3360901.3364425"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.websem.2018.11.004"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445518"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2017.2680460"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-30760-8_9"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICAIIC51459.2021.9415271"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.3115\/1564131.1564137"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1002\/asi.21652"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.511"},{"key":"ref60","article-title":"Data quality in data mining and machine learning","author":"Hulse","year":"2007"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1504\/IJSSCI.2008.019611"},{"key":"ref62","first-page":"415","article-title":"Addressing the real-world class imbalance problem in dermatology","volume-title":"Proc. Mach. Learn. Health","author":"Weng"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3343440"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.84"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45372-5_56"},{"key":"ref66","first-page":"1908","article-title":"A pseudo label based dataless naive bayes algorithm for text classification with seed words","volume-title":"Proc. 27th Int. Conf. Comput. Linguistics","author":"Li","year":"2018"},{"key":"ref67","first-page":"896","article-title":"Pseudo-label: The simple and efficient semi-supervised learning method for deep neural networks","volume-title":"Proc. Workshop Challenges Representation Learn.","author":"Lee","year":"2013"},{"key":"ref68","first-page":"2234","article-title":"Improved techniques for training gans","volume":"29","author":"Salimans","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref69","article-title":"Neural semi-supervised learning for text classification under large-scale pretraining","volume-title":"CoRR","volume":"abs\/2011.08626","author":"Sun","year":"2021"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3105013"},{"key":"ref71","first-page":"45","article-title":"Support vector machine active learning with applications to text classification","volume":"2","author":"Tong","year":"2001","journal-title":"J. Mach. Learn. Res."},{"key":"ref72","first-page":"122","article-title":"Automatic labeling of data for transfer learning","volume-title":"Conf. Comput. Vis. Pattern Recognit.","author":"Dube","year":"2019"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-021-02516-x"},{"key":"ref74","article-title":"Universal natural language processing with limited annotations: Try few-shot textual entailment as a start","volume-title":"CoRR","volume":"abs\/2010.02584","author":"Yin","year":"2021"},{"key":"ref75","article-title":"Automatically identifying words that can serve as labels for few-shot text classification","volume-title":"CoRR","volume":"abs\/2010.13641","author":"Schick","year":"2021"},{"key":"ref76","article-title":"Language models are few-shot learners","volume-title":"CoRR","volume":"abs\/2005.14165","author":"Brown","year":"2021"},{"key":"ref77","article-title":"Caselaw Access Project","year":"2020"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1145\/3462757.3466073"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/3462757.3466088"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.4337\/9781788972826.00017"},{"key":"ref81","article-title":"Legal writing: Legal arguments, briefs, and outlines","author":"White","year":"2021"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/3322640.3326723"},{"key":"ref83","first-page":"249","article-title":"Assessing agreement on classification tasks: The kappa statistic","volume-title":"Comput. Linguistics","volume":"22","author":"Carletta","year":"1996"},{"issue":"1","key":"ref84","first-page":"3133","article-title":"Do we need hundreds of classifiers to solve real world classification problems?","volume-title":"J. Mach. Learn. Res.","volume":"15","author":"Fernndez-Delgado","year":"2014"},{"key":"ref85","first-page":"3146","article-title":"Lightgbm: A highly efficient gradient boosting decision tree","volume":"30","author":"Ke","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref86","article-title":"Bert pre-training of deep bidirectional transformers for language understanding","volume-title":"CoRR","volume":"abs\/1810.04805","author":"Devlin","year":"2021"},{"key":"ref87","article-title":"Machine learning model evaluation metrics part 2: Multi-classification","author":"Khalusova","year":"2021"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/2801948.2802011"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2003.1250918"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3017382"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449988"}],"container-title":["IEEE Transactions on Reliability"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/24\/9787285\/9741798-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/24\/9787285\/09741798.pdf?arnumber=9741798","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:42:46Z","timestamp":1705538566000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9741798\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6]]},"references-count":91,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tr.2022.3156126","relation":{},"ISSN":["0018-9529","1558-1721"],"issn-type":[{"value":"0018-9529","type":"print"},{"value":"1558-1721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6]]}}}