{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T09:09:34Z","timestamp":1774429774298,"version":"3.50.1"},"reference-count":120,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,9,26]],"date-time":"2024-09-26T00:00:00Z","timestamp":1727308800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,26]],"date-time":"2024-09-26T00:00:00Z","timestamp":1727308800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002322","name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"name":"C\u00e2mara dos Deputados do Brasil"},{"DOI":"10.13039\/501100001807","name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001807","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Law"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10506-024-09419-5","type":"journal-article","created":{"date-parts":[[2024,9,26]],"date-time":"2024-09-26T01:02:10Z","timestamp":1727312530000},"page":"1-82","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Segmenting Brazilian legislative text using weak supervision and active learning"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7528-6726","authenticated-orcid":false,"given":"Felipe A.","family":"Siqueira","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Diany","family":"Pressato","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fab\u00edola S. F.","family":"Pereira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"N\u00e1dia F. F.","family":"da Silva","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ellen","family":"Souza","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M\u00e1rcio S.","family":"Dias","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andr\u00e9 C. P. L. F.","family":"de Carvalho","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,26]]},"reference":[{"key":"9419_CR1","unstructured":"Afonso S, Bick E, Haber R, et\u00a0al (2002) Floresta sint\u00e1tica: a treebank for portuguese. In: Proceedings of the 3rd International Conference on Language Resources and Evaluation (LREC), Las Palmas, Spain, pp 1698\u20131703"},{"key":"9419_CR2","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-98305-5_1","volume-title":"Computational Processing of the Portuguese Language","author":"HO Albuquerque","year":"2022","unstructured":"Albuquerque HO, Costa R, Silvestre G et al (2022) Ulyssesner-br: a corpus of brazilian legislative documents for named entity recognition. In: Pinheiro V, Gamallo P, Amaro R et al (eds) Computational Processing of the Portuguese Language. Springer International Publishing, Cham, pp 3\u201314"},{"key":"9419_CR3","doi-asserted-by":"publisher","DOI":"10.3390\/app122010559","author":"AM Alshanqiti","year":"2022","unstructured":"Alshanqiti AM, Albouq S, Alkhodre AB et al (2022) Employing a multilingual transformer model for segmenting unpunctuated arabic text. Appl Sci. https:\/\/doi.org\/10.3390\/app122010559","journal-title":"Appl Sci"},{"key":"9419_CR4","first-page":"3884","volume-title":"Advances in Neural Information Processing Systems","author":"J Ash","year":"2020","unstructured":"Ash J, Adams RP (2020) On warm-starting neural network training. In: Larochelle H, Ranzato M, Hadsell R et al (eds) Advances in Neural Information Processing Systems, vol 33. Curran Associates Inc, New York, pp 3884\u20133894"},{"issue":"2","key":"9419_CR5","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1145\/1964897.1964906","volume":"12","author":"J Attenberg","year":"2011","unstructured":"Attenberg J, Provost F (2011) Inactive learning? difficulties employing active learning in practice. SIGKDD Explor Newsl 12(2):36\u201344. https:\/\/doi.org\/10.1145\/1964897.1964906","journal-title":"SIGKDD Explor Newsl"},{"key":"9419_CR6","unstructured":"Baldridge J, Osborne M (2004) Active learning and the total cost of annotation. In: Lin D, Wu D (eds) Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Barcelona, Spain, pp 9\u201316. https:\/\/aclanthology.org\/W04-3202"},{"issue":"1","key":"9419_CR7","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1023\/A:1007506220214","volume":"34","author":"D Beeferman","year":"1999","unstructured":"Beeferman D, Berger A, Lafferty J (1999) Statistical models for text segmentation. Mach Learn 34(1):177\u2013210","journal-title":"Mach Learn"},{"key":"9419_CR8","doi-asserted-by":"crossref","unstructured":"Bhattacharya P, Paul S, Ghosh K, et\u00a0al (2019) Identification of Rhetorical Roles of Sentences in Indian Legal Judgments. In: Proceedings of the 32nd International Conference on Legal Knowledge and Information Systems (JURIX)","DOI":"10.3233\/FAIA190301"},{"key":"9419_CR9","volume-title":"Natural language processing with Python: analyzing text with the natural language toolkit","author":"S Bird","year":"2009","unstructured":"Bird S, Klein E, Loper E (2009) Natural language processing with Python: analyzing text with the natural language toolkit. O\u2019Reilly Media Inc, Sevastopol"},{"key":"9419_CR10","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI (2003) Latent Dirichlet allocation. J Mach Learn Res 3:993\u20131022","journal-title":"J Mach Learn Res"},{"key":"9419_CR11","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3192101","author":"MJ Bommarito","year":"2018","unstructured":"Bommarito MJ, Katz DM, Detterman E (2018) LexNLP: natural language processing and information extraction for legal and regulatory texts. SSRN Electron J. https:\/\/doi.org\/10.2139\/ssrn.3192101","journal-title":"SSRN Electron J"},{"key":"9419_CR12","doi-asserted-by":"publisher","unstructured":"Brantley K, Sharaf A, Daum\u00e9\u00a0III H (2020) Active imitation learning with noisy guidance. In: Jurafsky D, Chai J, Schluter N, et\u00a0al (eds) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Online, pp 2093\u2013210https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.189","DOI":"10.18653\/v1\/2020.acl-main.189"},{"key":"9419_CR13","first-page":"1877","volume-title":"Advances in neural information processing systems","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N et al (2020) Language models are few-shot learners. In: Larochelle H, Ranzato M, Hadsell R et al (eds) Advances in neural information processing systems, vol 33. Curran Associates Inc, New York, pp 1877\u20131901"},{"key":"9419_CR14","doi-asserted-by":"publisher","unstructured":"Cai T, Zhou Y, Zheng H (2020) Cost-quality adaptive active learning for chinese clinical named entity recognition. In: 2020 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp 528\u201353. https:\/\/doi.org\/10.1109\/BIBM49941.2020.9313302","DOI":"10.1109\/BIBM49941.2020.9313302"},{"key":"9419_CR15","doi-asserted-by":"crossref","unstructured":"Carlson L, Marcu D, Okurovsky ME (2001) Building a discourse-tagged corpus in the framework of Rhetorical Structure Theory. In: Proceedings of the second SIGdial workshop on discourse and dialogue. https:\/\/aclanthology.org\/W01-1605","DOI":"10.21236\/ADA460581"},{"key":"9419_CR16","unstructured":"Casanova E, Treviso M, H\u00fcbner L, et\u00a0al (2020) Evaluating sentence segmentation in different datasets of neuropsychological language tests in Brazilian Portuguese. In: Calzolari N, B\u00e9chet F, Blache P, et\u00a0al (eds) Proceedings of the Twelfth Language Resources and Evaluation Conference. European Language Resources Association, Marseille, France, pp 2605\u20132614. https:\/\/aclanthology.org\/2020.lrec-1.317"},{"key":"9419_CR17","doi-asserted-by":"publisher","unstructured":"Charikar M, Steinhardt J, Valiant G (2017) Learning from untrusted data. In: Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing. Association for Computing Machinery, New York, NY, USA, STOC 2017, p 47-56. https:\/\/doi.org\/10.1145\/3055399.3055491","DOI":"10.1145\/3055399.3055491"},{"key":"9419_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1162\/tacl_a_00350","volume":"9","author":"A Chaudhary","year":"2021","unstructured":"Chaudhary A, Anastasopoulos A, Sheikh Z et al (2021) Reducing confusion in active learning for part-of-speech tagging. Trans Assoc Comput Linguist 9:1\u20131. https:\/\/doi.org\/10.1162\/tacl_a_00350","journal-title":"Trans Assoc Comput Linguist"},{"key":"9419_CR19","doi-asserted-by":"crossref","unstructured":"Chen H, Branavan S, Barzilay R, et\u00a0al (2009) Global models of document structure using latent permutations. In: Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics. Association for Computational Linguistics, Boulder, Colorado, pp 371\u2013379. https:\/\/aclanthology.org\/N09-1042","DOI":"10.3115\/1620754.1620808"},{"key":"9419_CR20","doi-asserted-by":"crossref","unstructured":"Chiticariu L, Li Y, Reiss FR (2013) Rule-based information extraction is dead! long live rule-based information extraction systems! In: Yarowsky D, Baldwin T, Korhonen A, et\u00a0al (eds) Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Seattle, Washington, USA, pp 827\u2013832. https:\/\/aclanthology.org\/D13-1079","DOI":"10.18653\/v1\/D13-1079"},{"key":"9419_CR21","doi-asserted-by":"publisher","unstructured":"Cho S, Song K, Wang X, et\u00a0al (2022) Toward unifying text segmentation and long document summarization. In: Goldberg Y, Kozareva Z, Zhang Y (eds) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates, pp 106. https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.8","DOI":"10.18653\/v1\/2022.emnlp-main.8"},{"key":"9419_CR22","doi-asserted-by":"publisher","unstructured":"Choi E, He H, Iyyer M, et\u00a0al (2018) QuAC: Question answering in context. In: Riloff E, Chiang D, Hockenmaier J, et\u00a0al (eds) Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Brussels, Belgium, pp 2174\u2013218. https:\/\/doi.org\/10.18653\/v1\/D18-1241","DOI":"10.18653\/v1\/D18-1241"},{"key":"9419_CR23","unstructured":"Choi FYY (2000) Advances in domain independent linear text segmentation. In: 6th Applied Natural Language Processing Conference, ANLP 2000, Seattle, Washington, USA, April 29 - May 4, 2000. ACL, pp 26\u201333. https:\/\/aclanthology.org\/A00-2004\/"},{"key":"9419_CR24","unstructured":"Choi FYY, Wiemer-Hastings P, Moore J (2001) Latent semantic analysis for text segmentation. In: Proceedings of the 2001 Conference on Empirical Methods in Natural Language Processing. https:\/\/aclanthology.org\/W01-0514"},{"key":"9419_CR25","doi-asserted-by":"publisher","unstructured":"Conneau A, Khandelwal K, Goyal N, et\u00a0al (2020) Unsupervised cross-lingual representation learning at scale. In: Jurafsky D, Chai J, Schluter N, et\u00a0al (eds) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Online, pp 8440\u2013845. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.747","DOI":"10.18653\/v1\/2020.acl-main.747"},{"issue":"3","key":"9419_CR26","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1023\/A:1022627411411","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik VN (1995) Support-vector networks. Mach Learn 20(3):273\u2013297","journal-title":"Mach Learn"},{"key":"9419_CR27","doi-asserted-by":"publisher","unstructured":"Cotterell R, Heigold G (2017) Cross-lingual character-level neural morphological tagging. In: Palmer M, Hwa R, Riedel S (eds) Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Copenhagen, Denmark, pp 748\u2013755. https:\/\/doi.org\/10.18653\/v1\/D17-1078","DOI":"10.18653\/v1\/D17-1078"},{"key":"9419_CR28","doi-asserted-by":"publisher","unstructured":"Deepak, Visweswariah K, Wiratunga N, et\u00a0al (2012) Two-part segmentation of text documents. In: Proceedings of the 21st ACM International Conference on Information and Knowledge Management. Association for Computing Machinery, New York, NY, USA, CIKM \u201912, p 793-800. https:\/\/doi.org\/10.1145\/2396761.2396862","DOI":"10.1145\/2396761.2396862"},{"key":"9419_CR29","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang MW, Lee K, et\u00a0al (2019) BERT: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota, pp 4171\u20134186. https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"9419_CR30","doi-asserted-by":"crossref","unstructured":"Eisenstein J (2009) Hierarchical text segmentation from multi-scale lexical cohesion. In: Ostendorf M, Collins M, Narayanan S, et\u00a0al (eds) Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics. Association for Computational Linguistics, Boulder, Colorado, pp 353\u2013361. https:\/\/aclanthology.org\/N09-1040","DOI":"10.3115\/1620754.1620806"},{"key":"9419_CR31","doi-asserted-by":"publisher","unstructured":"Fadel A, Tuffaha I, Al-Jawarneh B, et\u00a0al (2019) Arabic text diacritization using deep neural networks. In: 2019 2nd International Conference on Computer Applications and Information Security (ICCAIS), pp 1\u20137. https:\/\/doi.org\/10.1109\/CAIS.2019.8769512","DOI":"10.1109\/CAIS.2019.8769512"},{"key":"9419_CR32","doi-asserted-by":"publisher","unstructured":"Falcon W, The PyTorch Lightning team (2019) PyTorch Lightning. https:\/\/doi.org\/10.5281\/zenodo.3828935","DOI":"10.5281\/zenodo.3828935"},{"key":"9419_CR33","doi-asserted-by":"publisher","unstructured":"Fan J (2011) Text segmentation of consumer magazines in pdf format. In: 2011 International Conference on Document Analysis and Recognition, pp 794\u2013798.https:\/\/doi.org\/10.1109\/ICDAR.2011.163","DOI":"10.1109\/ICDAR.2011.163"},{"key":"9419_CR34","doi-asserted-by":"publisher","unstructured":"Feng F, Yang Y, Cer D, et\u00a0al (2022) Language-agnostic BERT sentence embedding. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Dublin, Ireland, pp 878\u2013891. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.62","DOI":"10.18653\/v1\/2022.acl-long.62"},{"key":"9419_CR35","doi-asserted-by":"publisher","unstructured":"Florescu C, Caragea C (2017) PositionRank: An unsupervised approach to keyphrase extraction from scholarly documents. In: Barzilay R, Kan MY (eds) Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Vancouver, Canada, pp 1105\u20131115. https:\/\/doi.org\/10.18653\/v1\/P17-1102,","DOI":"10.18653\/v1\/P17-1102"},{"key":"9419_CR36","unstructured":"Fragkou P (2013) Text segmentation for language identification in Greek forums. In: Vertan C, Slavcheva M, Osenova P (eds) Proceedings of the Workshop on Adaptation of Language Resources and Tools for Closely Related Languages and Language Variants. INCOMA Ltd. Shoumen, BULGARIA, Hissar, Bulgaria, pp 23\u201329. https:\/\/aclanthology.org\/W13-5305"},{"issue":"2","key":"9419_CR37","first-page":"23","volume":"12","author":"P Gage","year":"1994","unstructured":"Gage P (1994) A new algorithm for data compression. C Users J 12(2):23\u201338","journal-title":"C Users J"},{"key":"9419_CR38","unstructured":"Gal Y, Islam R, Ghahramani Z (2017) Deep Bayesian active learning with image data. In: Precup D, Teh YW (eds) Proceedings of the 34th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol\u00a070. PMLR, pp 1183\u20131192. https:\/\/proceedings.mlr.press\/v70\/gal17a.html"},{"key":"9419_CR39","doi-asserted-by":"publisher","unstructured":"Glava\u0161 G, Nanni F, Ponzetto SP (2016) Unsupervised text segmentation using semantic relatedness graphs. In: Proceedings of the Fifth Joint Conference on Lexical and Computational Semantics. Association for Computational Linguistics, Berlin, Germany, pp 125\u201313. https:\/\/doi.org\/10.18653\/v1\/S16-2016","DOI":"10.18653\/v1\/S16-2016"},{"key":"9419_CR40","doi-asserted-by":"publisher","unstructured":"Gong H, Shen Y, Yu D, et\u00a0al (2020) Recurrent chunking mechanisms for long-text machine reading comprehension. In: Jurafsky D, Chai J, Schluter N, et\u00a0al (eds) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Online, pp 6751\u20136761. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.603","DOI":"10.18653\/v1\/2020.acl-main.603"},{"issue":"6","key":"9419_CR41","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","volume":"129","author":"J Gou","year":"2021","unstructured":"Gou J, Yu B, Maybank SJ et al (2021) Knowledge distillation: a survey. Int J Comput Vision 129(6):1789\u20131819. https:\/\/doi.org\/10.1007\/s11263-021-01453-z","journal-title":"Int J Comput Vision"},{"issue":"101","key":"9419_CR42","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1016\/j.csl.2021.101261","volume":"71","author":"R Guarasci","year":"2022","unstructured":"Guarasci R, Silvestri S, De Pietro G et al (2022) Bert syntactic transfer: a computational experiment on Italian, French and english languages. Comput Speech Lang 71(101):26. https:\/\/doi.org\/10.1016\/j.csl.2021.101261","journal-title":"Comput Speech Lang"},{"key":"9419_CR43","volume-title":"Cohesion in english","author":"MAK Halliday","year":"1976","unstructured":"Halliday MAK, Hasan R (1976) Cohesion in english. Routledge, England"},{"key":"9419_CR44","doi-asserted-by":"publisher","unstructured":"Hancock B, Varma P, Wang S, et\u00a0al (2018) Training classifiers with natural language explanations. In: Gurevych I, Miyao Y (eds) Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Melbourne, Australia, pp 1884\u20131895. https:\/\/doi.org\/10.18653\/v1\/P18-1175","DOI":"10.18653\/v1\/P18-1175"},{"issue":"1","key":"9419_CR45","first-page":"33","volume":"23","author":"MA Hearst","year":"1997","unstructured":"Hearst MA (1997) TextTiling: segmenting text into multi-paragraph subtopic passages. Comput Linguist 23(1):33\u201364","journal-title":"Comput Linguist"},{"key":"9419_CR46","unstructured":"Hendrycks D, Mazeika M, Wilson D, et\u00a0al (2018) Using trusted data to train deep networks on labels corrupted by severe noise. In: Bengio S, Wallach H, Larochelle H, et\u00a0al (eds) Advances in Neural Information Processing Systems, vol\u00a031. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2018\/file\/ad554d8c3b06d6b97ee76a2448bd7913-Paper.pdf"},{"issue":"8","key":"9419_CR47","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780. https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput"},{"key":"9419_CR48","unstructured":"Houlsby N, Husz\u00e1r F, Ghahramani Z, et\u00a0al (2011) Bayesian active learning for classification and preference learning. arXiv:1112.5745"},{"key":"9419_CR49","unstructured":"Huang Sj, Jin R, Zhou ZH (2010) Active learning by querying informative and representative examples. In: Lafferty J, Williams C, Shawe-Taylor J, et\u00a0al (eds) Advances in Neural Information Processing Systems, vol\u00a023. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2010\/file\/5487315b1286f907165907aa8fc96619-Paper.pdf"},{"issue":"2","key":"9419_CR50","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/s10994-021-05946-3","volume":"110","author":"E H\u00fcllermeier","year":"2021","unstructured":"H\u00fcllermeier E, Waegeman W (2021) Aleatoric and epistemic uncertainty in machine learning: an introduction to concepts and methods. Mach Learn 110(2):457\u2013506. https:\/\/doi.org\/10.1007\/s10994-021-05946-3","journal-title":"Mach Learn"},{"key":"9419_CR51","doi-asserted-by":"publisher","unstructured":"Jayakumar T, Farooqui F, Farooqui L (2023) Large language models are legal but they are not: Making the case for a powerful LegalLLM. In: Preotiuc-Pietro D, Goanta C, Chalkidis I, et\u00a0al (eds) Proceedings of the Natural Legal Language Processing Workshop 2023. Association for Computational Linguistics, Singapore, pp 223\u2013229. https:\/\/doi.org\/10.18653\/v1\/2023.nllp-1.22,","DOI":"10.18653\/v1\/2023.nllp-1.22"},{"key":"9419_CR52","doi-asserted-by":"publisher","unstructured":"Joshi M, Choi E, Weld D, et\u00a0al (2017) TriviaQA: A large scale distantly supervised challenge dataset for reading comprehension. In: Barzilay R, Kan MY (eds) Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Vancouver, Canada, pp 1601\u20131611. https:\/\/doi.org\/10.18653\/v1\/P17-1147","DOI":"10.18653\/v1\/P17-1147"},{"issue":"1","key":"9419_CR53","doi-asserted-by":"publisher","first-page":"211","DOI":"10.3390\/ai3010013","volume":"3","author":"D Kartchner","year":"2022","unstructured":"Kartchner D, Nakajima An D, Ren W et al (2022) Rule-enhanced active learning for semi-automated weak supervision. AI 3(1):211\u2013228. https:\/\/doi.org\/10.3390\/ai3010013","journal-title":"AI"},{"key":"9419_CR54","unstructured":"Kingma D, Ba J (2015) Adam: A method for stochastic optimization. In: International Conference on Learning Representations (ICLR), San Diega, CA, USA"},{"key":"9419_CR55","doi-asserted-by":"publisher","unstructured":"Koshorek O, Cohen A, Mor N, et\u00a0al (2018) Text segmentation as a supervised learning task. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers). Association for Computational Linguistics, New Orleans, Louisiana, pp 469\u2013473. https:\/\/doi.org\/10.18653\/v1\/N18-2075","DOI":"10.18653\/v1\/N18-2075"},{"key":"9419_CR56","unstructured":"Lafferty JD, McCallum A, Pereira FCN (2001) Conditional random fields: probabilistic models for segmenting and labeling sequence data. In: Proceedings of the Eighteenth International Conference on Machine Learning. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, ICML \u201901, pp 282-289"},{"key":"9419_CR57","doi-asserted-by":"publisher","unstructured":"Lai S, Xu L, Liu K, et\u00a0al (2015) Recurrent convolutional neural networks for text classification. In: Proceedings of the AAAI Conference on Artificial Intelligence. https:\/\/doi.org\/10.1609\/aaai.v29i1.9513","DOI":"10.1609\/aaai.v29i1.9513"},{"key":"9419_CR58","doi-asserted-by":"publisher","unstructured":"Lewis DD, Catlett J (1994) Heterogeneous uncertainty sampling for supervised learning. In: Cohen WW, Hirsh H (eds) Machine Learning Proceedings 1994. Morgan Kaufmann, San Francisco (CA), pp 148\u2013156. https:\/\/doi.org\/10.1016\/B978-1-55860-335-6.50026-X,","DOI":"10.1016\/B978-1-55860-335-6.50026-X"},{"key":"9419_CR59","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-1-4471-2099-5_1","volume-title":"SIGIR \u201994","author":"DD Lewis","year":"1994","unstructured":"Lewis DD, Gale WA (1994) A sequential algorithm for training text classifiers. In: Croft BW, van Rijsbergen CJ (eds) SIGIR \u201994. Springer, London, pp 3\u201312"},{"key":"9419_CR60","unstructured":"Li S, Zhou G, Huang CR (2012) Active learning for Chinese word segmentation. In: Kay M, Boitet C (eds) Proceedings of COLING 2012: Posters. The COLING 2012 Organizing Committee, Mumbai, India, pp 683\u2013692. https:\/\/aclanthology.org\/C12-2067"},{"key":"9419_CR61","doi-asserted-by":"publisher","unstructured":"Lin H, Lu Y, Han X, et\u00a0al (2019) Gazetteer-enhanced attentive neural networks for named entity recognition. In: Inui K, Jiang J, Ng V, et\u00a0al (eds) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, Hong Kong, China, pp 6232\u20136237. https:\/\/doi.org\/10.18653\/v1\/D19-1646,","DOI":"10.18653\/v1\/D19-1646"},{"issue":"1\u20133","key":"9419_CR62","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1007\/BF01589116","volume":"45","author":"DC Liu","year":"1989","unstructured":"Liu DC, Nocedal J (1989) Limited memory BFGS method for large scale optimization. Math program 45(1\u20133):503\u2013528","journal-title":"Math program"},{"key":"9419_CR63","unstructured":"Liu X, Yu HF, Dhillon I, et\u00a0al (2020a) Learning to encode position for transformer with continuous dynamical model. In: III HD, Singh A (eds) Proceedings of the 37th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol 119. PMLR, pp 6327\u20136335. https:\/\/proceedings.mlr.press\/v119\/liu20n.html"},{"key":"9419_CR64","unstructured":"Liu Y, Ott M, Goyal N, et\u00a0al (2020b) Roberta: a robustly optimized bert pretraining approach. https:\/\/openreview.net\/forum?id=SyxS0T4tvS"},{"key":"9419_CR65","doi-asserted-by":"publisher","unstructured":"Lowell D, Lipton ZC, Wallace BC (2019) Practical obstacles to deploying active learning. In: Inui K, Jiang J, Ng V, et\u00a0al (eds) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, Hong Kong, China, pp 21\u201330. https:\/\/doi.org\/10.18653\/v1\/D19-1003","DOI":"10.18653\/v1\/D19-1003"},{"key":"9419_CR66","doi-asserted-by":"publisher","unstructured":"Lukasik M, Dadachev B, Papineni K, et\u00a0al (2020) Text segmentation by cross segment attention. In: Webber B, Cohn T, He Y, et\u00a0al (eds) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP). Association for Computational Linguistics, Online, pp 4707\u20134716. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.380,","DOI":"10.18653\/v1\/2020.emnlp-main.380"},{"key":"9419_CR67","doi-asserted-by":"publisher","unstructured":"Luo B, Feng Y, Wang Z, et\u00a0al (2018) Marrying up regular expressions with neural networks: A case study for spoken language understanding. In: Gurevych I, Miyao Y (eds) Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Melbourne, Australia, pp 2083\u20132093. https:\/\/doi.org\/10.18653\/v1\/P18-1194","DOI":"10.18653\/v1\/P18-1194"},{"issue":"20","key":"9419_CR68","first-page":"589","volume":"6","author":"T Luo","year":"2005","unstructured":"Luo T, Kramer K, Goldgof DB et al (2005) Active learning to recognize multiple types of plankton. J Mach Learn Res 6(20):589\u2013613","journal-title":"J Mach Learn Res"},{"issue":"4","key":"9419_CR69","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1162\/neco.1992.4.4.590","volume":"4","author":"DJC MacKay","year":"1992","unstructured":"MacKay DJC (1992) Information-based objective functions for active data selection. Neural Comput 4(4):590\u2013604. https:\/\/doi.org\/10.1162\/neco.1992.4.4.590","journal-title":"Neural Comput"},{"key":"9419_CR70","doi-asserted-by":"crossref","unstructured":"Manning CD, Surdeanu M, Bauer J, et\u00a0al (2014) The Stanford CoreNLP natural language processing toolkit. In: Association for Computational Linguistics (ACL) System Demonstrations, pp 55\u201360. http:\/\/www.aclweb.org\/anthology\/P\/P14\/P14-5010","DOI":"10.3115\/v1\/P14-5010"},{"key":"9419_CR71","unstructured":"Mikolov T, Chen K, Corrado G, et\u00a0al (2013) Efficient estimation of word representations in vector space. In: Proceedings of the International Conference on Learning Representations (ICLR)"},{"key":"9419_CR72","doi-asserted-by":"publisher","unstructured":"Moniz JRA, Patra B, Gormley M (2022) On efficiently acquiring annotations for multilingual models. In: Muresan S, Nakov P, Villavicencio A (eds) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers). Association for Computational Linguistics, Dublin, Ireland, pp 69\u201385. https:\/\/doi.org\/10.18653\/v1\/2022.acl-short.9,","DOI":"10.18653\/v1\/2022.acl-short.9"},{"issue":"1","key":"9419_CR73","first-page":"21","volume":"17","author":"J Morris","year":"1991","unstructured":"Morris J, Hirst G (1991) Lexical cohesion computed by thesaural relations as an indicator of the structure of text. Comput Linguist 17(1):21\u201348","journal-title":"Comput Linguist"},{"key":"9419_CR74","unstructured":"Myers S, Palmer M (2021) Tuning deep active learning for semantic role labeling. In: Zarrie\u00df S, Bos J, van Noord R, et\u00a0al (eds) Proceedings of the 14th International Conference on Computational Semantics (IWCS). Association for Computational Linguistics, Groningen, The Netherlands (online), pp 212\u2013221. https:\/\/aclanthology.org\/2021.iwcs-1.20"},{"key":"9419_CR75","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-030-33778-0_7","volume-title":"Discovery science","author":"VL Nguyen","year":"2019","unstructured":"Nguyen VL, Destercke S, H\u00fcllermeier E (2019) Epistemic uncertainty sampling. In: Kralj Novak P, \u0160muc T, D\u017eeroski S (eds) Discovery science. Springer International Publishing, Cham, pp 72\u201386"},{"issue":"05","key":"9419_CR76","doi-asserted-by":"publisher","first-page":"8536","DOI":"10.1609\/aaai.v34i05.6375","volume":"34","author":"A Ni","year":"2020","unstructured":"Ni A, Yin P, Neubig G (2020) Merging weak and active supervision for semantic parsing. Proc AAAI Conf Artif Intell 34(05):8536\u2013854. https:\/\/doi.org\/10.1609\/aaai.v34i05.6375","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"9419_CR77","doi-asserted-by":"publisher","unstructured":"Nodet P, Lemaire V, Bondu A, et\u00a0al (2021) From weakly supervised learning to biquality learning: an introduction. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp 1\u201310. https:\/\/doi.org\/10.1109\/IJCNN52387.2021.9533353","DOI":"10.1109\/IJCNN52387.2021.9533353"},{"key":"9419_CR78","first-page":"167","volume-title":"Text segmentation techniques: a critical review","author":"I Pak","year":"2018","unstructured":"Pak I, Teh PL (2018) Text segmentation techniques: a critical review. Springer International Publishing, Cham, pp 167\u2013181"},{"key":"9419_CR79","first-page":"8024","volume":"32","author":"A Paszke","year":"2019","unstructured":"Paszke A, Gross S, Massa F et al (2019) Pytorch: an imperative style, high-performance deep learning library. Adv Neural Inf Process Syst 32:8024\u20138035","journal-title":"Adv Neural Inf Process Syst"},{"key":"9419_CR80","first-page":"309","volume-title":"Computational linguistics and intelligent text processing","author":"S Patil","year":"2015","unstructured":"Patil S, Ravindran B (2015) Active learning based weak supervision for textual survey response classification. In: Gelbukh A (ed) Computational linguistics and intelligent text processing. Springer International Publishing, Cham, pp 309\u2013320"},{"issue":"1","key":"9419_CR81","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1162\/089120102317341756","volume":"28","author":"L Pevzner","year":"2002","unstructured":"Pevzner L, Hearst MA (2002) A critique and improvement of an evaluation metric for text segmentation. Comput Linguist 28(1):19\u201336. https:\/\/doi.org\/10.1162\/089120102317341756","journal-title":"Comput Linguist"},{"key":"9419_CR82","doi-asserted-by":"publisher","unstructured":"Pires T, Schlinger E, Garrette D (2019) How multilingual is multilingual BERT? In: Korhonen A, Traum D, M\u00e0rquez L (eds) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Florence, Italy, pp 4996\u20135001. https:\/\/doi.org\/10.18653\/v1\/P19-1493","DOI":"10.18653\/v1\/P19-1493"},{"key":"9419_CR83","volume-title":"Head-driven phrase structure grammar","author":"C Pollard","year":"1994","unstructured":"Pollard C, Sag IA (1994) Head-driven phrase structure grammar, 1st edn. University of Chicago Press, Chicago","edition":"1"},{"key":"9419_CR84","unstructured":"Press O, Smith N, Lewis M (2022) Train short, test long: Attention with linear biases enables input length extrapolation. In: International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=R8sQPpGCv0"},{"key":"9419_CR85","doi-asserted-by":"publisher","unstructured":"Qian K, Chozhiyath\u00a0Raman P, Li Y, et\u00a0al (2020) Learning structured representations of entity names using Active Learning and weak supervision. In: Webber B, Cohn T, He Y, et\u00a0al (eds) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP). Association for Computational Linguistics, Online, pp 6376\u20136383. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.517","DOI":"10.18653\/v1\/2020.emnlp-main.517"},{"key":"9419_CR86","doi-asserted-by":"crossref","unstructured":"Ratinov L, Roth D (2009) Design challenges and misconceptions in named entity recognition. In: Stevenson S, Carreras X (eds) Proceedings of the Thirteenth Conference on Computational Natural Language Learning (CoNLL-2009). Association for Computational Linguistics, Boulder, Colorado, pp 147\u2013155. https:\/\/aclanthology.org\/W09-1119","DOI":"10.3115\/1596374.1596399"},{"key":"9419_CR87","doi-asserted-by":"publisher","unstructured":"Ratner AJ, Bach SH, Ehrenberg HR, et\u00a0al (2017) Snorkel: Fast training set generation for information extraction. In: Proceedings of the 2017 ACM International Conference on Management of Data. Association for Computing Machinery, New York, NY, USA, SIGMOD \u201917, pp 1683-1686. https:\/\/doi.org\/10.1145\/3035918.3056442,","DOI":"10.1145\/3035918.3056442"},{"key":"9419_CR88","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1162\/tacl_a_00266","volume":"7","author":"S Reddy","year":"2019","unstructured":"Reddy S, Chen D, Manning CD (2019) CoQA: a conversational question answering challenge. Trans Assoc Comput Linguist 7:249\u2013266. https:\/\/doi.org\/10.1162\/tacl_a_00266","journal-title":"Trans Assoc Comput Linguist"},{"key":"9419_CR89","doi-asserted-by":"publisher","unstructured":"Reynar JC (1994) An automatic method of finding topic boundaries. In: 32nd Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Las Cruces, New Mexico, USA, pp 331\u2013333. https:\/\/doi.org\/10.3115\/981732.981783","DOI":"10.3115\/981732.981783"},{"key":"9419_CR90","unstructured":"Riedl M, Biemann C (2012a) How text segmentation algorithms gain from topic models. In: Fosler-Lussier E, Riloff E, Bangalore S (eds) Proceedings of the 2012 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Association for Computational Linguistics, Montr\u00e9al, Canada, pp 553\u2013557. https:\/\/aclanthology.org\/N12-1064"},{"key":"9419_CR91","unstructured":"Riedl M, Biemann C (2012b) Topictiling: A text segmentation algorithm based on lda. In: Proceedings of ACL 2012 Student Research Workshop. Association for Computational Linguistics, USA, ACL \u201912, p 37-42"},{"key":"9419_CR92","unstructured":"Rino LHM, Di Felippo A, Pardo TAS (2013) Insights for better rst segmentation of texts in portuguese. Anais do IV Workshop \u2018A RST e os Estudos do Texto.\u201d Sociedade Brasileira de Computa\u00e7\u00e3o, Fortaleza, CE, Brasil, pp 30\u201339"},{"key":"9419_CR93","unstructured":"Roy N, McCallum A (2001) Toward optimal active learning through sampling estimation of error reduction. In: Proceedings of the Eighteenth International Conference on Machine Learning. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, ICML \u201901, pp 441-448"},{"key":"9419_CR94","doi-asserted-by":"publisher","unstructured":"Sanchez G (2019) Sentence boundary detection in legal text. In: Proceedings of the Natural Legal Language Processing Workshop 2019. Association for Computational Linguistics, Minneapolis, Minnesota, pp 31\u201338. https:\/\/doi.org\/10.18653\/v1\/W19-2204","DOI":"10.18653\/v1\/W19-2204"},{"key":"9419_CR95","first-page":"21","volume":"58","author":"J Savelka","year":"2017","unstructured":"Savelka J, Walker VR, Grabmair M et al (2017) Sentence boundary detection in adjudicatory decisions in the united states. Traitement Automatique des Langues 58:21","journal-title":"Traitement Automatique des Langues"},{"key":"9419_CR96","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/3-540-44816-0_31","volume-title":"Advances in intelligent data analysis","author":"T Scheffer","year":"2001","unstructured":"Scheffer T, Decomain C, Wrobel S (2001) Active hidden markov models for information extraction. In: Hoffmann F, Hand DJ, Adams N et al (eds) Advances in intelligent data analysis. Springer, Berlin, pp 309\u2013318"},{"issue":"3","key":"9419_CR97","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/s10994-007-5019-5","volume":"68","author":"AI Schein","year":"2007","unstructured":"Schein AI, Ungar LH (2007) Active learning for logistic regression: an evaluation. Mach Learn 68(3):235\u2013265. https:\/\/doi.org\/10.1007\/s10994-007-5019-5","journal-title":"Mach Learn"},{"key":"9419_CR98","doi-asserted-by":"publisher","unstructured":"Schr\u00f6der C, Niekler A, Potthast M (2022) Revisiting uncertainty-based query strategies for active learning with transformers. In: Muresan S, Nakov P, Villavicencio A (eds) Findings of the Association for Computational Linguistics: ACL 2022. Association for Computational Linguistics, Dublin, Ireland, pp 2194\u20132203. https:\/\/doi.org\/10.18653\/v1\/2022.findings-acl.172","DOI":"10.18653\/v1\/2022.findings-acl.172"},{"key":"9419_CR99","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.ins.2013.07.030","volume":"255","author":"R Senge","year":"2014","unstructured":"Senge R, B\u00f6sner S, Dembczy\u0144ski K et al (2014) Reliable classification: learning classifiers that distinguish aleatoric and epistemic uncertainty. Inf Sci 255:16\u201329. https:\/\/doi.org\/10.1016\/j.ins.2013.07.030","journal-title":"Inf Sci"},{"key":"9419_CR100","doi-asserted-by":"publisher","unstructured":"Seung HS, Opper M, Sompolinsky H (1992) Query by committee. In: Proceedings of the Fifth Annual Workshop on Computational Learning Theory. Association for Computing Machinery, New York, NY, USA, COLT \u201992, pp 287-294. https:\/\/doi.org\/10.1145\/130385.130417","DOI":"10.1145\/130385.130417"},{"key":"9419_CR101","first-page":"48","volume":"2645","author":"D Seyler","year":"2020","unstructured":"Seyler D, Bruin P, Bayyapu P et al (2020) Finding contextually consistent information units in legal text. CEUR Workshop Proc 2645:48\u201351","journal-title":"CEUR Workshop Proc"},{"key":"9419_CR102","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1002\/j.1538-7305.1948.tb01338.x","volume":"27","author":"CE Shannon","year":"1948","unstructured":"Shannon CE (1948) A mathematical theory of communication. Bell Syst Technic J 27:379\u2013423","journal-title":"Bell Syst Technic J"},{"issue":"1","key":"9419_CR103","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1007\/s10618-016-0460-3","volume":"31","author":"M Sharma","year":"2017","unstructured":"Sharma M, Bilgic M (2017) Evidence-based uncertainty sampling for active learning. Data Min Knowl Discov 31(1):164\u2013202. https:\/\/doi.org\/10.1007\/s10618-016-0460-3","journal-title":"Data Min Knowl Discov"},{"key":"9419_CR104","doi-asserted-by":"publisher","unstructured":"Shelmanov A, Puzyrev D, Kupriyanova L, et\u00a0al (2021) Active learning for sequence tagging with deep pre-trained models and Bayesian uncertainty estimates. In: Merlo P, Tiedemann J, Tsarfaty R (eds) Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume. Association for Computational Linguistics, Online, pp 1698\u20131712. https:\/\/doi.org\/10.18653\/v1\/2021.eacl-main.145","DOI":"10.18653\/v1\/2021.eacl-main.145"},{"issue":"1","key":"9419_CR105","doi-asserted-by":"publisher","first-page":"72","DOI":"10.2307\/1412159","volume":"15","author":"C Spearman","year":"1904","unstructured":"Spearman C (1904) The proof and measurement of association between two things. Am J Psychol 15(1):72\u2013101","journal-title":"Am J Psychol"},{"key":"9419_CR106","doi-asserted-by":"publisher","unstructured":"Tang M, Luo X, Roukos S (2002) Active learning for statistical natural language parsing. In: Isabelle P, Charniak E, Lin D (eds) Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Philadelphia, Pennsylvania, USA, pp 120\u2013127. https:\/\/doi.org\/10.3115\/1073083.1073105","DOI":"10.3115\/1073083.1073105"},{"issue":"4","key":"9419_CR107","first-page":"415","volume":"30","author":"WL Taylor","year":"1953","unstructured":"Taylor WL (1953) Cloze procedure: a new tool for measuring readability. J Q 30(4):415\u2013433","journal-title":"J Q"},{"key":"9419_CR108","unstructured":"Tomanek K, Morik K (2011) Inspecting sample reusability for active learning. In: Guyon I, Cawley G, Dror G, et\u00a0al (eds) Active Learning and Experimental Design workshop In conjunction with AISTATS 2010, Proceedings of Machine Learning Research, vol\u00a016. PMLR, Sardinia, Italy, pp 169\u2013181. https:\/\/proceedings.mlr.press\/v16\/tomanek11a.html"},{"key":"9419_CR109","doi-asserted-by":"publisher","unstructured":"Utiyama M, Isahara H (2001) A statistical model for domain-independent text segmentation. In: Proceedings of the 39th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Toulouse, France, pp 499\u2013506. https:\/\/doi.org\/10.3115\/1073012.1073076","DOI":"10.3115\/1073012.1073076"},{"key":"9419_CR110","volume-title":"Advances in neural information processing systems","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. In: Guyon I, Luxburg UV, Bengio S et al (eds) Advances in neural information processing systems, vol 30. Curran Associates Inc, New York"},{"key":"9419_CR111","unstructured":"Waltl B, Bonczek G, Matthes F (2018) Rule-based information extraction: Advantages, limitations, and perspectives. Jusletter IT (02 2018) 4"},{"issue":"1\u20132","key":"9419_CR112","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1093\/biomet\/34.1-2.28","volume":"34","author":"BL Welch","year":"1947","unstructured":"Welch BL (1947) The generalization of \u2018student\u2019s\u2019 problem when several different population variances are involved. Biometrika 34(1\u20132):28\u201335. https:\/\/doi.org\/10.1093\/biomet\/34.1-2.28","journal-title":"Biometrika"},{"issue":"6","key":"9419_CR113","doi-asserted-by":"publisher","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"F Wilcoxon","year":"1945","unstructured":"Wilcoxon F (1945) Individual comparisons by ranking methods. Biometrics Bull 1(6):80\u201383","journal-title":"Biometrics Bull"},{"key":"9419_CR114","doi-asserted-by":"crossref","unstructured":"Wolf T, Debut L, Sanh V, et\u00a0al (2020) Transformers: State-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations pp 38\u201345","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"9419_CR115","doi-asserted-by":"publisher","unstructured":"Yang J, Wei F, Huber-Fliflet N, et\u00a0al (2023) An empirical analysis of text segmentation for bert classification in extended documents. In: 2023 IEEE International Conference on Big Data (BigData). IEEE Computer Society, Los Alamitos, CA, USA, pp 2793\u20132797. https:\/\/doi.org\/10.1109\/BigData59044.2023.10386783","DOI":"10.1109\/BigData59044.2023.10386783"},{"key":"9419_CR116","doi-asserted-by":"publisher","unstructured":"Yuan M, Lin HT, Boyd-Graber J (2020) Cold-start active learning through self-supervised language modeling. In: Webber B, Cohn T, He Y, et\u00a0al (eds) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP). Association for Computational Linguistics, Online, pp 7935\u20137948. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.637","DOI":"10.18653\/v1\/2020.emnlp-main.637"},{"key":"9419_CR117","volume-title":"Advances in neural information processing systems","author":"C Zhang","year":"2015","unstructured":"Zhang C, Chaudhuri K (2015) Active learning from weak and strong labelers. In: Cortes C, Lawrence N, Lee D et al (eds) Advances in neural information processing systems, vol 28. Curran Associates Inc, New York"},{"key":"9419_CR118","doi-asserted-by":"publisher","unstructured":"Zhang R, Yu Y, Shetty P, et\u00a0al (2022a) Prompt-based rule discovery and boosting for interactive weakly-supervised learning. In: Muresan S, Nakov P, Villavicencio A (eds) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Dublin, Ireland, pp 745\u2013758. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.55","DOI":"10.18653\/v1\/2022.acl-long.55"},{"key":"9419_CR119","doi-asserted-by":"publisher","unstructured":"Zhang Z, Strubell E, Hovy E (2022b) A survey of active learning for natural language processing. In: Goldberg Y, Kozareva Z, Zhang Y (eds) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates, pp 6166\u20136190. https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.414","DOI":"10.18653\/v1\/2022.emnlp-main.414"},{"issue":"1","key":"9419_CR120","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1093\/nsr\/nwx106","volume":"5","author":"ZH Zhou","year":"2017","unstructured":"Zhou ZH (2017) A brief introduction to weakly supervised learning. Natl Sci Rev 5(1):44\u201353. https:\/\/doi.org\/10.1093\/nsr\/nwx106","journal-title":"Natl Sci Rev"}],"container-title":["Artificial Intelligence and Law"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10506-024-09419-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10506-024-09419-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10506-024-09419-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T06:45:29Z","timestamp":1774421129000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10506-024-09419-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,26]]},"references-count":120,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["9419"],"URL":"https:\/\/doi.org\/10.1007\/s10506-024-09419-5","relation":{},"ISSN":["0924-8463","1572-8382"],"issn-type":[{"value":"0924-8463","type":"print"},{"value":"1572-8382","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,26]]},"assertion":[{"value":"22 August 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}