{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:06:16Z","timestamp":1757617576713,"version":"3.44.0"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,2,23]],"date-time":"2025-02-23T00:00:00Z","timestamp":1740268800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,23]],"date-time":"2025-02-23T00:00:00Z","timestamp":1740268800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Evolving Systems"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s12530-025-09671-3","type":"journal-article","created":{"date-parts":[[2025,2,23]],"date-time":"2025-02-23T11:47:11Z","timestamp":1740311231000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive data augmentation for salient sentence identification in Indian judicial decisions"],"prefix":"10.1007","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3567-9757","authenticated-orcid":false,"given":"Reshma","family":"Sheik","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Krishnadas","family":"Nair","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S. K. Manu","family":"Krishna","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S. Jaya","family":"Nirmala","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,23]]},"reference":[{"key":"9671_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.118318","volume":"209","author":"S Ahmad","year":"2022","unstructured":"Ahmad S, Asghar MZ, Alotaibi FM, Al-Otaibi YD (2022) A hybrid cnn+ bilstm deep learning-based dss for efficient prediction of judicial case decisions. Expert Syst Appl 209:118318","journal-title":"Expert Syst Appl"},{"issue":"1","key":"9671_CR2","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1186\/s40537-023-00727-2","volume":"10","author":"L Alzubaidi","year":"2023","unstructured":"Alzubaidi L, Bai J, Al-Sabaawi A, Santamar\u00eda J, Albahri A, Al-dabbagh BSN, Fadhel MA, Manoufali M, Zhang J, Al-Timemy AH et al (2023) A survey on deep learning tools dealing with data scarcity: definitions, challenges, solutions, tips, and applications. J Big Data 10(1):46","journal-title":"J Big Data"},{"key":"9671_CR3","doi-asserted-by":"crossref","unstructured":"Anaby-Tavor A, Carmeli B, Goldbraich E, Kantor A, Kour G, Shlomov S, Tepper N, Zwerdling N (2020) Do not have enough data? deep learning to the rescue! In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 7383\u20137390","DOI":"10.1609\/aaai.v34i05.6233"},{"key":"9671_CR4","doi-asserted-by":"crossref","unstructured":"Bayer M, Kaufhold M-A, Reuter C (2021) A survey on data augmentation for text classification. ACM Computing Surveys","DOI":"10.1145\/3544558"},{"key":"9671_CR5","doi-asserted-by":"crossref","unstructured":"Bhattacharya P, Paul S, Ghosh K, Ghosh S, Wyner A (2021) Deeprhole: deep learning for rhetorical role labeling of sentences in legal case documents. Artificial Intelligence and Law, 1\u201338","DOI":"10.1007\/s10506-021-09304-5"},{"key":"9671_CR6","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst 33:1877\u20131901","journal-title":"Adv Neural Inf Process Syst"},{"key":"9671_CR7","doi-asserted-by":"crossref","unstructured":"Chalkidis I, Fergadiotis M, Malakasiotis P, Aletras N, Androutsopoulos I (2020) Legal-bert: The muppets straight out of law school. arXiv preprint arXiv:2010.02559","DOI":"10.18653\/v1\/2020.findings-emnlp.261"},{"issue":"2","key":"9671_CR8","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1016\/j.ipm.2010.07.003","volume":"47","author":"E Chen","year":"2011","unstructured":"Chen E, Lin Y, Xiong H, Luo Q, Ma H (2011) Exploiting probabilistic topic models to improve text categorization under class imbalance. Information Processing & Management 47(2):202\u2013214","journal-title":"Information Processing & Management"},{"key":"9671_CR9","doi-asserted-by":"crossref","unstructured":"Chen Z, Eavani H, Chen W, Liu Y, Wang WY (2019) Few-shot nlg with pre-trained language model. arXiv preprint arXiv:1904.09521","DOI":"10.18653\/v1\/2020.acl-main.18"},{"key":"9671_CR10","unstructured":"Chen Y, Liu Y (2022) Rethinking data augmentation in text-to-text paradigm. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 1157\u20131162. International Committee on Computational Linguistics, Gyeongju, Republic of Korea"},{"issue":"1","key":"9671_CR11","doi-asserted-by":"publisher","first-page":"15","DOI":"10.14513\/actatechjaur.00628","volume":"15","author":"G Cs\u00e1nyi","year":"2022","unstructured":"Cs\u00e1nyi G, Orosz T (2022) Comparison of data augmentation methods for legal document classification. Acta Technica Jaurinensis 15(1):15\u201321","journal-title":"Acta Technica Jaurinensis"},{"key":"9671_CR12","unstructured":"Edwards A, Ushio A, Camacho-Collados J, Ribaupierre H, Preece A (2021) Guiding generative language models for data augmentation in few-shot text classification. arXiv preprint arXiv:2111.09064"},{"key":"9671_CR13","doi-asserted-by":"crossref","unstructured":"Elsahar H, Gravier C, Laforest F (2018) Zero-shot question generation from knowledge graphs for unseen predicates and entity types. arXiv preprint arXiv:1802.06842","DOI":"10.18653\/v1\/N18-1020"},{"key":"9671_CR14","doi-asserted-by":"crossref","unstructured":"Feng SY, Gangal V, Wei J, Chandar S, Vosoughi S, Mitamura T, Hovy E (2021) A survey of data augmentation approaches for nlp. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, pp. 968\u2013988","DOI":"10.18653\/v1\/2021.findings-acl.84"},{"issue":"5","key":"9671_CR15","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1037\/h0031619","volume":"76","author":"JL Fleiss","year":"1971","unstructured":"Fleiss JL (1971) Measuring nominal scale agreement among many raters. Psychol Bull 76(5):378","journal-title":"Psychol Bull"},{"key":"9671_CR16","doi-asserted-by":"crossref","unstructured":"Glaser I, Sadegharmaki S, Komboz B, Matthes F (2021) Data scarcity: Methods to improve the quality of text classification. In: ICPRAM, pp. 556\u2013564","DOI":"10.5220\/0010268005560564"},{"key":"9671_CR17","doi-asserted-by":"crossref","unstructured":"Glaser I, Scepankova E, Matthes F (2018) Classifying semantic types of legal sentences: Portability of machine learning models. In: Legal Knowledge and Information Systems, pp. 61\u201370","DOI":"10.3233\/978-1-61499-935-5-61"},{"key":"9671_CR18","doi-asserted-by":"crossref","unstructured":"Guo Z, Liu J, He T, Li Z, Zhangzhu P (2020) Taujud: test augmentation of machine learning in judicial documents. In: Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis, pp. 549\u2013552","DOI":"10.1145\/3395363.3404364"},{"key":"9671_CR19","unstructured":"Guo H, Mao Y, Zhang R (2019) Augmenting Data with Mixup for Sentence Classification: An Empirical Study. arXiv"},{"key":"9671_CR20","unstructured":"Hsu H, Lachenbruch PA (2014) Paired t test. Wiley StatsRef: statistics reference online"},{"key":"9671_CR21","doi-asserted-by":"crossref","unstructured":"Jayasinghe S, Rambukkanage L, Silva A, Silva N, Perera AS (2021) Critical sentence identification in legal cases using multi-class classification. In: 2021 IEEE 16th International Conference on Industrial and Information Systems (ICIIS), pp. 146\u2013151. IEEE","DOI":"10.1109\/ICIIS53135.2021.9660657"},{"key":"9671_CR22","doi-asserted-by":"crossref","unstructured":"Kafle K, Yousefhussien M, Kanan C (2017) Data augmentation for visual question answering. In: Proceedings of the 10th International Conference on Natural Language Generation, pp. 198\u2013202","DOI":"10.18653\/v1\/W17-3529"},{"key":"9671_CR23","doi-asserted-by":"crossref","unstructured":"Kafle K, Yousefhussien M, Kanan C (2017) Data augmentation for visual question answering. In: Proceedings of the 10th International Conference on Natural Language Generation, pp. 198\u2013202. Association for Computational Linguistics, Santiago de Compostela, Spain","DOI":"10.18653\/v1\/W17-3529"},{"key":"9671_CR24","unstructured":"Kaushik D, Hovy E, Lipton ZC (2019) Learning the difference that makes a difference with counterfactually-augmented data. arXiv preprint arXiv:1909.12434"},{"key":"9671_CR25","unstructured":"Kenton JDM-WC, Toutanova LK (2019) Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of naacL-HLT, vol. 1, p. 2"},{"key":"9671_CR26","unstructured":"Kingma DP, Ba J (2015) Adam: A method for stochastic optimization. In: Bengio, Y., LeCun, Y. (eds.) 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings"},{"key":"9671_CR27","doi-asserted-by":"crossref","unstructured":"Kobayashi S (2018) Contextual augmentation: Data augmentation by words with paradigmatic relations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers), pp. 452\u2013457. Association for Computational Linguistics, New Orleans, Louisiana","DOI":"10.18653\/v1\/N18-2072"},{"key":"9671_CR28","doi-asserted-by":"crossref","unstructured":"Kumar V, Choudhary A, Cho E (2020) Data augmentation using pre-trained transformer models. In: Proceedings of the 2nd Workshop on Life-long Learning for Spoken Language Systems, pp. 18\u201326. Association for Computational Linguistics, Suzhou, China","DOI":"10.18653\/v1\/2020.lifelongnlp-1.3"},{"key":"9671_CR29","unstructured":"Lakshminath A (1990) Precedent in the Indian Legal System. Eastern Book Company, India. https:\/\/books.google.co.in\/books?id=QjI9AQAAIAAJ"},{"key":"9671_CR30","doi-asserted-by":"crossref","unstructured":"Lewis M, Liu Y, Goyal N, Ghazvininejad M, Mohamed A, Levy O, Stoyanov V, Zettlemoyer L (2019) Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"9671_CR31","unstructured":"Maat E, Krabben K, Winkels R (2010) Machine learning versus knowledge based classification of legal texts. In: Legal Knowledge and Information Systems, pp. 87\u201396"},{"key":"9671_CR32","unstructured":"Maat E, Winkels R Automated classification of norms in sources of law. In: Semantic Processing of Legal Texts"},{"key":"9671_CR33","doi-asserted-by":"crossref","unstructured":"Malik V, Sanjay R, Nigam SK, Ghosh K, Guha SK, Bhattacharya A, Modi A (2021) Ildc for cjpe: Indian legal documents corpus for court judgment prediction and explanation. arXiv preprint arXiv:2105.13562","DOI":"10.18653\/v1\/2021.acl-long.313"},{"key":"9671_CR34","doi-asserted-by":"crossref","unstructured":"Okimura I, Reid M, Kawano M, Matsuo Y (2022) On the impact of data augmentation on downstream performance in natural language processing. In: Proceedings of the Third Workshop on Insights from Negative Results in NLP, pp. 88\u201393","DOI":"10.18653\/v1\/2022.insights-1.12"},{"key":"9671_CR35","unstructured":"Papanikolaou Y, Pierleoni A (2020) Dare: Data augmented relation extraction with gpt-2. arXiv preprint arXiv:2004.13845"},{"key":"9671_CR36","doi-asserted-by":"crossref","unstructured":"Paul S, Mandal A, Goyal P, Ghosh S (2023) Pre-trained language models for the legal domain: A case study on indian law. In: Proceedings of 19th International Conference on Artificial Intelligence and Law - ICAIL 2023. https:\/\/arxiv.org\/abs\/2209.06049","DOI":"10.1145\/3594536.3595165"},{"key":"9671_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2022.109803","volume":"132","author":"LFAO Pellicer","year":"2023","unstructured":"Pellicer LFAO, Ferreira TM, Costa AHR (2023) Data augmentation techniques in natural language processing. Appl Soft Comput 132:109803","journal-title":"Appl Soft Comput"},{"key":"9671_CR38","unstructured":"Peric L, Mijic S, Stammbach D, Ash E (2020) Legal language modeling with transformers. In: Proceedings of the Fourth Workshop on Automated Semantic Analysis of Information in Legal Text (ASAIL 2020) Held Online in Conjunction with 33rd International Conference on Legal Knowledge and Information Systems (JURIX 2020) December 9, 2020, vol. 2764"},{"key":"9671_CR39","first-page":"2","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Amodei D, Amodei D, Clark J, Brundage M, Sutskever I (2019) Better language models and their implications. OpenAI blog 1:2","journal-title":"OpenAI blog"},{"key":"9671_CR40","unstructured":"Raffel C, Shazeer N, Roberts A, Lee K, Narang S, Matena M, Zhou Y, Li W, Liu PJ (2020) Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer"},{"key":"9671_CR41","unstructured":"Ratner AJ, De\u00a0Sa CM, Wu S, Selsam D, R\u00e9 C (2016) Data programming: Creating large training sets, quickly. Advances in neural information processing systems 29"},{"key":"9671_CR42","doi-asserted-by":"crossref","unstructured":"Sennrich R, Haddow B, Birch A (2016) Improving neural machine translation models with monolingual data. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 86\u201396. Association for Computational Linguistics, Berlin, Germany","DOI":"10.18653\/v1\/P16-1009"},{"key":"9671_CR43","doi-asserted-by":"crossref","unstructured":"Sheik R, Gokul T, Nirmala S (2022) Efficient deep learning-based sentence boundary detection in legal text. In: Proceedings of the Natural Legal Language Processing Workshop 2022, pp. 208\u2013217","DOI":"10.18653\/v1\/2022.nllp-1.18"},{"key":"9671_CR44","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-021-00492-0","volume":"8","author":"C Shorten","year":"2021","unstructured":"Shorten C, Khoshgoftaar TM, Furht B (2021) Text data augmentation for deep learning. Journal of big Data 8:1\u201334","journal-title":"Journal of big Data"},{"issue":"1","key":"9671_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-021-00492-0","volume":"8","author":"C Shorten","year":"2021","unstructured":"Shorten C, Khoshgoftaar TM, Furht B (2021) Text data augmentation for deep learning. Journal of big Data 8(1):1\u201334","journal-title":"Journal of big Data"},{"key":"9671_CR46","doi-asserted-by":"crossref","unstructured":"Singh T, Kalra R, Mishra S, Satakshi Kumar M (2022) An efficient real-time stock prediction exploiting incremental learning and deep learning. Evolving Systems, 1\u201319","DOI":"10.1007\/s12530-022-09481-x"},{"key":"9671_CR47","unstructured":"Waltl B, Muhr J, Glaser I, Bonczek G, Scepankova E, Matthes F (2017) Classifying legal norms with active machine learning. In: JURIX, pp. 11\u201320"},{"issue":"1","key":"9671_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-016-0043-6","volume":"3","author":"K Weiss","year":"2016","unstructured":"Weiss K, Khoshgoftaar TM, Wang D (2016) A survey of transfer learning. Journal of Big data 3(1):1\u201340","journal-title":"Journal of Big data"},{"key":"9671_CR49","doi-asserted-by":"crossref","unstructured":"Wei J, Zou K (2019) EDA: Easy data augmentation techniques for boosting performance on text classification tasks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 6382\u20136388. Association for Computational Linguistics, Hong Kong, China","DOI":"10.18653\/v1\/D19-1670"},{"key":"9671_CR50","doi-asserted-by":"crossref","unstructured":"Wu X, Lv S, Zang L, Han J, Hu S (2019) Conditional bert contextual augmentation. In: International Conference on Computational Science, pp. 84\u201395. Springer","DOI":"10.1007\/978-3-030-22747-0_7"},{"key":"9671_CR51","doi-asserted-by":"crossref","unstructured":"Yan G, Li Y, Zhang S, Chen Z (2019) Data augmentation for deep learning of judgment documents. In: International Conference on Intelligent Science and Big Data Engineering, Springer, pp. 232\u2013242","DOI":"10.1007\/978-3-030-36204-1_19"},{"key":"9671_CR52","doi-asserted-by":"crossref","unstructured":"Yousri R, Moussa K, Elattar MA, Madian AH, Darweesh MS (2023) A novel data augmentation approach for ego-lane detection enhancement. Evolving Systems, 1\u201312","DOI":"10.1007\/s12530-023-09533-w"},{"key":"9671_CR53","unstructured":"Zhang D, Li T, Zhang H, Yin B (2020) On data augmentation for extreme multi-label classification. arXiv preprint arXiv:2009.10778"},{"key":"9671_CR54","unstructured":"Zhang X, Zhao J, LeCun Y (2015) Character-level convolutional networks for text classification. Advances in neural information processing systems 28"},{"key":"9671_CR55","doi-asserted-by":"crossref","unstructured":"Zheng L, Guha N, Anderson BR, Henderson P, Ho DE (2021) When does pretraining help? assessing self-supervised learning for law and the casehold dataset of 53,000+ legal holdings. In: Proceedings of the Eighteenth International Conference on Artificial Intelligence and Law, pp. 159\u2013168","DOI":"10.1145\/3462757.3466088"},{"key":"9671_CR56","doi-asserted-by":"crossref","unstructured":"Zhong H, Xiao C, Tu C, Zhang T, Liu Z, Sun M (2020) How does nlp benefit legal system: A summary of legal artificial intelligence. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 5218\u20135230","DOI":"10.18653\/v1\/2020.acl-main.466"}],"container-title":["Evolving Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12530-025-09671-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12530-025-09671-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12530-025-09671-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T06:58:17Z","timestamp":1757141897000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12530-025-09671-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,23]]},"references-count":56,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["9671"],"URL":"https:\/\/doi.org\/10.1007\/s12530-025-09671-3","relation":{},"ISSN":["1868-6478","1868-6486"],"issn-type":[{"type":"print","value":"1868-6478"},{"type":"electronic","value":"1868-6486"}],"subject":[],"published":{"date-parts":[[2025,2,23]]},"assertion":[{"value":"15 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that they have no involvement in any firm or entity with any financial or non-financial interest in the materials covered in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"40"}}