{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T16:27:24Z","timestamp":1774542444926,"version":"3.50.1"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T00:00:00Z","timestamp":1710374400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T00:00:00Z","timestamp":1710374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Law"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s10506-024-09394-x","type":"journal-article","created":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T10:03:48Z","timestamp":1710410628000},"page":"519-549","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Legal sentence boundary detection using hybrid deep learning and statistical models"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3567-9757","authenticated-orcid":false,"given":"Reshma","family":"Sheik","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sneha Rao","family":"Ganta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S. Jaya","family":"Nirmala","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,14]]},"reference":[{"key":"9394_CR1","unstructured":"Agarap AF (2018) Deep learning using rectified linear units (ReLU). arXiv preprint arXiv:1803.08375"},{"key":"9394_CR2","doi-asserted-by":"crossref","unstructured":"Allen LE,  Lysaght LJ (2015) Modern logic as a tool for remedying ambiguities in legal documents and analyzing the structure of legal documents\u2019 contained definitions. Logic in the Theory and Practice of Lawmaking, 383\u2013407","DOI":"10.1007\/978-3-319-19575-9_14"},{"key":"9394_CR3","unstructured":"Bahdanau D, Cho KH, Bengio Y (2015) Neural machine translation by jointly learning to align and translate. In: 3rd international conference on learning representations. ICLR 2015"},{"key":"9394_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-021-09304-5","author":"P Bhattacharya","year":"2021","unstructured":"Bhattacharya P, Paul S, Ghosh K, Ghosh S, Wyner A (2021) DeepRhole: deep learning for rhetorical role labeling of sentences in legal case documents. Artif Intell Law. https:\/\/doi.org\/10.1007\/s10506-021-09304-5","journal-title":"Artif Intell Law"},{"key":"9394_CR5","doi-asserted-by":"crossref","unstructured":"Bird S (2006) NLTK: the natural language toolkit. In: Proceedings of the COLING\/ACL 2006 interactive presentation sessions, pp 69\u201372","DOI":"10.3115\/1225403.1225421"},{"key":"9394_CR6","doi-asserted-by":"crossref","unstructured":"Brugger T, St\u00fcrmer M, Niklaus J (2023) MultiLegalSBD: a multilingual legal sentence boundary detection dataset. arXiv:2305.01211","DOI":"10.1145\/3594536.3595132"},{"key":"9394_CR7","doi-asserted-by":"crossref","unstructured":"Chalkidis I, Androutsopoulos I (2017) A deep learning approach to contract element extraction. In: JURIX, vol 2017, pp 155\u2013164","DOI":"10.3233\/978-1-61499-838-9-155"},{"key":"9394_CR8","doi-asserted-by":"crossref","unstructured":"Chalkidis I, Fergadiotis M, Malakasiotis P, Aletras N, Androutsopoulos I (2020) LEGAL-BERT: the Muppets straight out of law school. In: Findings of the association for computational linguistics: EMNLP 2020, pp 2898\u20132904","DOI":"10.18653\/v1\/2020.findings-emnlp.261"},{"issue":"2","key":"9394_CR9","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1109\/TR.2022.3156126","volume":"71","author":"H Chen","year":"2022","unstructured":"Chen H, Pieptea LF, Ding J (2022) Construction and evaluation of a high-quality corpus for legal intelligence using semiautomated approaches. IEEE Trans Reliab 71(2):657\u2013673","journal-title":"IEEE Trans Reliab"},{"key":"9394_CR10","doi-asserted-by":"crossref","unstructured":"Chollampatt S, Ng HT (2018) A multilayer convolutional encoder-decoder neural network for grammatical error correction. In: Proceedings of the AAAI conference on artificial intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.12069"},{"key":"9394_CR11","unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555"},{"key":"9394_CR12","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"Collobert R, Weston J, Bottou L, Karlen M, Kavukcuoglu K, Kuksa P (2011) Natural language processing (almost) from scratch. J Mach Learn Res 12:2493\u20132537","journal-title":"J Mach Learn Res"},{"key":"9394_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-023-09349-8","author":"A Deroy","year":"2023","unstructured":"Deroy A, Ghosh K, Ghosh S (2023) Ensemble methods for improving extractive summarization of legal case judgements. Artif Intell Law. https:\/\/doi.org\/10.1007\/s10506-023-09349-8","journal-title":"Artif Intell Law"},{"issue":"7","key":"9394_CR14","doi-asserted-by":"publisher","first-page":"1895","DOI":"10.1162\/089976698300017197","volume":"10","author":"TG Dietterich","year":"1998","unstructured":"Dietterich TG (1998) Approximate statistical tests for comparing supervised classification learning algorithms. Neural Comput 10(7):1895\u20131923","journal-title":"Neural Comput"},{"issue":"1","key":"9394_CR15","first-page":"61","volume":"19","author":"T Dunning","year":"1994","unstructured":"Dunning T (1994) Accurate methods for the statistics of surprise and coincidence. Comput Linguist 19(1):61\u201374","journal-title":"Comput Linguist"},{"issue":"2","key":"9394_CR16","first-page":"7","volume":"5","author":"WN Francis","year":"1979","unstructured":"Francis WN, Kucera H (1979) Brown corpus manual. Lett Editor 5(2):7","journal-title":"Lett Editor"},{"issue":"10","key":"9394_CR17","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers FA, Schmidhuber J, Cummins F (2000) Learning to forget: continual prediction with LSTM. Neural Comput 12(10):2451\u20132471","journal-title":"Neural Comput"},{"key":"9394_CR18","doi-asserted-by":"crossref","unstructured":"Gillick D (2009) Sentence boundary detection and the problem with the us. In: Proceedings of human language technologies: the 2009 annual conference of the North American chapter of the association for computational linguistics, companion volume: short papers, pp 241\u2013244","DOI":"10.3115\/1620853.1620920"},{"key":"9394_CR19","doi-asserted-by":"crossref","unstructured":"Glaser I, Moser S, Matthes F (2021) Sentence boundary detection in German legal documents. In: ICAART (2), pp 812\u2013821","DOI":"10.5220\/0010246308120821"},{"key":"9394_CR20","unstructured":"Grefenstette G, Tapanainen P (1994) What is a word, what is a sentence?: problems of tokenisation"},{"key":"9394_CR21","unstructured":"Griffis D, Shivade C, Fosler-Lussier E, Lai AM (2016) A quantitative and qualitative evaluation of sentence boundary detection for the clinical domain. In AMIA summits on translational science proceedings, vol 2016, p 88"},{"key":"9394_CR22","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-023-09361-y","author":"I Habernal","year":"2023","unstructured":"Habernal I, Faber D, Recchia N, Bretthauer S, Gurevych I, Spiecker genannt D\u00f6hmann I, Burchard C (2023) Mining legal arguments in court decisions. Artif Intell Law. https:\/\/doi.org\/10.1007\/s10506-023-09361-y","journal-title":"Artif Intell Law"},{"key":"9394_CR23","unstructured":"Honnibal M, Montani I, Van\u00a0Landeghem S, Boyd A (2020) spaCy: industrial-strength Natural Language Processing in Python"},{"key":"9394_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-023-09345-y","author":"D Jain","year":"2023","unstructured":"Jain D, Borah MD, Biswas A (2023) A sentence is known by the company it keeps: improving legal document summarization using deep clustering. Artif Intell Law. https:\/\/doi.org\/10.1007\/s10506-023-09345-y","journal-title":"Artif Intell Law"},{"key":"9394_CR25","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: Bengio Y, LeCun Y (eds) 3rd international conference on learning representations. ICLR 2015, San Diego, CA, USA, May 7\u20139, 2015, Conference Track Proceedings"},{"issue":"4","key":"9394_CR26","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1162\/coli.2006.32.4.485","volume":"32","author":"T Kiss","year":"2006","unstructured":"Kiss T, Strunk J (2006) Unsupervised multilingual sentence boundary detection. Comput Linguist 32(4):485\u2013525","journal-title":"Comput Linguist"},{"key":"9394_CR27","doi-asserted-by":"crossref","unstructured":"Kudo T, Richardson J (2018) SentencePiece: a simple and language independent subword tokenizer and detokenizer for neural text processing. In: Proceedings of the 2018 conference on empirical methods in natural language processing: system demonstrations. Association for Computational Linguistics, pp 66\u201371","DOI":"10.18653\/v1\/D18-2012"},{"key":"9394_CR28","unstructured":"Lafferty J, McCallum A, Pereira FC (2001) Conditional random fields: probabilistic models for segmenting and labeling sequence data"},{"issue":"6","key":"9394_CR29","first-page":"277","volume":"7","author":"UA Lavery","year":"1921","unstructured":"Lavery UA (1921) The language of the law. Am Bar Assoc J 7(6):277\u2013283","journal-title":"Am Bar Assoc J"},{"key":"9394_CR30","unstructured":"Lin Z, Feng M, Santos CNd, Yu M, Xiang B, Zhou B, Bengio Y (2017) A structured self-attentive sentence embedding. arXiv preprint arXiv:1703.03130"},{"issue":"1\u20133","key":"9394_CR31","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1007\/BF01589116","volume":"45","author":"DC Liu","year":"1989","unstructured":"Liu DC, Nocedal J (1989) On the limited memory BFGS method for large scale optimization. Math Program 45(1\u20133):503\u2013528","journal-title":"Math Program"},{"key":"9394_CR32","doi-asserted-by":"crossref","unstructured":"L\u00f3pez R, Pardo TA (2015) Experiments on sentence boundary detection in user-generated web content. In: Computational linguistics and intelligent text processing: 16th international conference, CICLing 2015, Cairo, Egypt, April 14\u201320, 2015, Springer proceedings, Part I 16, pp 227\u2013237","DOI":"10.1007\/978-3-319-18111-0_18"},{"key":"9394_CR33","doi-asserted-by":"crossref","unstructured":"Malik V, Sanjay R, Nigam SK, Ghosh K, Guha SK, Bhattacharya A, Modi A (2021) ILDC for CJPE: Indian legal documents corpus for court judgment prediction and explanation. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (volume 1: long papers). Association for Computational Linguistics, Online, pp 4046\u20134062","DOI":"10.18653\/v1\/2021.acl-long.313"},{"key":"9394_CR34","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-021-09296-2","author":"A Mandal","year":"2021","unstructured":"Mandal A, Ghosh K, Ghosh S, Mandal S (2021) A sequence labeling model for catchphrase identification from legal case documents. Artif Intell Law. https:\/\/doi.org\/10.1007\/s10506-021-09296-2","journal-title":"Artif Intell Law"},{"key":"9394_CR35","doi-asserted-by":"crossref","unstructured":"Manning CD, Surdeanu M, Bauer J, Finkel JR, Bethard S, McClosky D (2014) The Stanford CoreNLP natural language processing toolkit. In: Proceedings of 52nd annual meeting of the association for computational linguistics: system demonstrations, pp 55\u201360","DOI":"10.3115\/v1\/P14-5010"},{"issue":"2","key":"9394_CR36","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/BF02295996","volume":"12","author":"Q McNemar","year":"1947","unstructured":"McNemar Q (1947) Note on the sampling error of the difference between correlated proportions or percentages. Psychometrika 12(2):153\u2013157","journal-title":"Psychometrika"},{"key":"9394_CR37","unstructured":"Mikheev A (2000) Tagging sentence boundaries. In: 1st meeting of the North American chapter of the association for computational linguistics"},{"key":"9394_CR38","doi-asserted-by":"crossref","unstructured":"Minixhofer B, Pfeiffer J, Vuli\u0107 I (2023) Where\u2019s the point? Self-supervised multilingual punctuation-agnostic sentence segmentation. arXiv preprint arXiv:2305.18893","DOI":"10.18653\/v1\/2023.acl-long.398"},{"key":"9394_CR39","unstructured":"Okazaki N (2007) CRFsuite: a fast implementation of conditional random fields (CRFs)"},{"issue":"3","key":"9394_CR40","doi-asserted-by":"publisher","first-page":"1681","DOI":"10.1007\/s10462-021-10051-x","volume":"55","author":"V P\u0103i\u015f","year":"2021","unstructured":"P\u0103i\u015f V, Tufi\u015f D (2021) Capitalization and punctuation restoration: a survey. Artif Intell Rev 55(3):1681\u20131722","journal-title":"Artif Intell Rev"},{"issue":"2","key":"9394_CR41","first-page":"241","volume":"23","author":"DD Palmer","year":"1997","unstructured":"Palmer DD, Hearst MA (1997) Adaptive multilingual sentence boundary disambiguation. Comput Linguist 23(2):241\u2013267","journal-title":"Comput Linguist"},{"key":"9394_CR42","doi-asserted-by":"crossref","unstructured":"Qi P, Zhang Y, Zhang Y, Bolton J, Manning CD (2020) Stanza: a Python natural language processing toolkit for many human languages. In: Proceedings of the 58th annual meeting of the association for computational linguistics: system demonstrations","DOI":"10.18653\/v1\/2020.acl-demos.14"},{"key":"9394_CR43","unstructured":"Read J, Dridan R, Oepen S, Solberg LJ (2012) Sentence boundary detection: a long solved problem? In: Proceedings of COLING 2012. Posters, pp 985\u2013994"},{"key":"9394_CR44","unstructured":"Rehbein I, Ruppenhofer J, Schmidt T (2020) Improving sentence boundary detection for spoken language transcripts. In: Proceedings of the twelfth language resources and evaluation conference. European Language Resources Association, Marseille, France, pp 7102\u20137111"},{"key":"9394_CR45","doi-asserted-by":"crossref","unstructured":"Reynar JC, Ratnaparkhi A (1997) A maximum entropy approach to identifying sentence boundaries. In: Proceedings of the fifth conference on applied natural language processing, pp 16\u201319","DOI":"10.3115\/974557.974561"},{"key":"9394_CR46","doi-asserted-by":"crossref","unstructured":"Riley M (1989) Some applications of tree-based modelling to speech and language. In: Speech and natural language: proceedings of a workshop held at Cape Cod, Massachusetts, October 15\u201318, 1989","DOI":"10.3115\/1075434.1075492"},{"key":"9394_CR47","unstructured":"Rudrapal D, Jamatia A, Chakma K, Das A, Gamb\u00e4ck B (2015) Sentence boundary detection for social media text. In: Proceedings of the 12th international conference on natural language processing, pp 254\u2013260"},{"key":"9394_CR48","doi-asserted-by":"crossref","unstructured":"Sadvilkar N, Neumann M (2020) PySBD: pragmatic sentence boundary disambiguation. In: Proceedings of second workshop for NLP open source software (NLP-OSS), pp 110\u2013114","DOI":"10.18653\/v1\/2020.nlposs-1.15"},{"key":"9394_CR49","doi-asserted-by":"crossref","unstructured":"Sanchez G (2019) Sentence boundary detection in legal text. In: Proceedings of the natural legal language processing workshop 2019, pp 31\u201338","DOI":"10.18653\/v1\/W19-2204"},{"key":"9394_CR50","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/s10506-010-9087-7","volume":"18","author":"M Saravanan","year":"2010","unstructured":"Saravanan M, Ravindran B (2010) Identification of rhetorical roles for segmentation and summarization of a legal judgment. Artif Intell Law 18:45\u201376","journal-title":"Artif Intell Law"},{"key":"9394_CR51","first-page":"21","volume":"58","author":"J Savelka","year":"2017","unstructured":"Savelka J, Walker VR, Grabmair M, Ashley KD (2017) Sentence boundary detection in adjudicatory decisions in the United States. Trait Autom Lang 58:21","journal-title":"Trait Autom Lang"},{"key":"9394_CR52","unstructured":"Schweter S, Ahmed S (2019) Deep-EOS: general-purpose neural networks for sentence boundary detection. In: KONVENS"},{"key":"9394_CR53","doi-asserted-by":"crossref","unstructured":"Sheik R, Gokul T, Nirmala S (2022) Efficient deep learning-based sentence boundary detection in legal text. In: Proceedings of the natural legal language processing workshop 2022, pp 208\u2013217","DOI":"10.18653\/v1\/2022.nllp-1.18"},{"key":"9394_CR54","doi-asserted-by":"crossref","unstructured":"Sutton C, McCallum A et al (2012) An introduction to conditional random fields. Found Trends\u00ae Mach Learn 4(4):267\u2013373","DOI":"10.1561\/2200000013"},{"key":"9394_CR55","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In:\nProceedings of the 31st International Conference on Neural Information Processing Systems. NIPS'17, pp 6000\u20136010"},{"key":"9394_CR56","unstructured":"Wang X, Utiyama M, Sumita E (2019) Online sentence segmentation for simultaneous interpretation using multi-shifted recurrent neural network. In: Proceedings of machine translation summit XVII: research track, pp 1\u201311"},{"key":"9394_CR57","doi-asserted-by":"crossref","unstructured":"Wicks R, Post M (2021) A unified approach to sentence segmentation of punctuated text in many languages. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (volume 1: long papers), pp 3995\u20134007","DOI":"10.18653\/v1\/2021.acl-long.309"},{"key":"9394_CR58","doi-asserted-by":"crossref","unstructured":"Wong F, Chao S (2010) iSentenizer: an incremental sentence boundary classifier. In: Proceedings of the 6th international conference on natural language processing and knowledge engineering (NLPKE-2010). IEEE, pp 1\u20137","DOI":"10.1109\/NLPKE.2010.5587856"},{"issue":"3","key":"9394_CR59","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1109\/MCI.2018.2840738","volume":"13","author":"T Young","year":"2018","unstructured":"Young T, Hazarika D, Poria S, Cambria E (2018) Recent trends in deep learning based natural language processing. IEEE Comput Intell Mag 13(3):55\u201375","journal-title":"IEEE Comput Intell Mag"},{"key":"9394_CR60","doi-asserted-by":"crossref","unstructured":"Zheng L, Guha N, Anderson BR, Henderson P, Ho DE (2021) When does pretraining help? Assessing self-supervised learning for law and the casehold dataset of 53,000+ legal holdings. In: Proceedings of the eighteenth international conference on artificial intelligence and law, pp 159\u2013168","DOI":"10.1145\/3462757.3466088"}],"container-title":["Artificial Intelligence and Law"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10506-024-09394-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10506-024-09394-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10506-024-09394-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T12:38:09Z","timestamp":1749040689000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10506-024-09394-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,14]]},"references-count":60,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["9394"],"URL":"https:\/\/doi.org\/10.1007\/s10506-024-09394-x","relation":{},"ISSN":["0924-8463","1572-8382"],"issn-type":[{"value":"0924-8463","type":"print"},{"value":"1572-8382","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,14]]},"assertion":[{"value":"13 February 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that they have no involvement in any firm or entity with any financial or non-financial interest in the materials covered in this document.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}