{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T20:04:04Z","timestamp":1773777844608,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T00:00:00Z","timestamp":1764892800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T00:00:00Z","timestamp":1764892800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"TERMITRAD"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s00799-025-00437-5","type":"journal-article","created":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T09:04:14Z","timestamp":1764925454000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["STRAS: a semantic textual-cues leveraged Rule-based approach for article separation in historical newspapers"],"prefix":"10.1007","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1009-3875","authenticated-orcid":false,"given":"Nancy","family":"Girdhar","sequence":"first","affiliation":[]},{"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[]},{"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,5]]},"reference":[{"key":"437_CR1","doi-asserted-by":"crossref","unstructured":"An, C., Yin, D., Baird, H.S.: Document segmentation using pixel-accurate ground truth. In: 2010 20th International Conference on Pattern Recognition, pp. 245\u2013248 (2010). IEEE","DOI":"10.1109\/ICPR.2010.69"},{"key":"437_CR2","doi-asserted-by":"crossref","unstructured":"Hebert, D., Paquet, T., Nicolas, S.: Continuous crf with multi-scale quantization feature functions application to structure extraction in old newspaper. In: 2011 International Conference on Document Analysis and Recognition, pp. 493\u2013497 (2011). IEEE","DOI":"10.1109\/ICDAR.2011.105"},{"key":"437_CR3","doi-asserted-by":"crossref","unstructured":"Palfray, T., Hebert, D., Nicolas, S., Tranouez, P., Paquet, T.: Logical segmentation for article extraction in digitized old newspapers. In: Proceedings of the 2012 ACM Symposium on Document Engineering, pp. 129\u2013132 (2012)","DOI":"10.1145\/2361354.2361383"},{"key":"437_CR4","doi-asserted-by":"crossref","unstructured":"Girdhar, N., Coustaty, M., Doucet, A.: Digitizing history: Transitioning historical paper documents to digital content for information retrieval and mining\u2014a comprehensive survey. IEEE Transactions on Computational Social Systems (2024)","DOI":"10.1109\/TCSS.2024.3378419"},{"key":"437_CR5","doi-asserted-by":"crossref","unstructured":"Meier, B., Stadelmann, T., Stampfli, J., Arnold, M., Cieliebak, M.: Fully convolutional neural networks for newspaper article segmentation. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol. 1, pp. 414\u2013419 (2017). IEEE","DOI":"10.1109\/ICDAR.2017.75"},{"key":"437_CR6","doi-asserted-by":"crossref","unstructured":"Gatos, B., Pratikakis, I., Perantonis, S.J.: Efficient binarization of historical and degraded document images. In: 2008 The Eighth IAPR International Workshop on Document Analysis Systems, pp. 447\u2013454 (2008). IEEE","DOI":"10.1109\/DAS.2008.66"},{"issue":"10","key":"437_CR7","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1016\/0031-3203(94)90068-X","volume":"27","author":"DS Le","year":"1994","unstructured":"Le, D.S., Thoma, G.R., Wechsler, H.: Automated page orientation and skew angle detection for binary document images. Pattern Recogn. 27(10), 1325\u20131344 (1994)","journal-title":"Pattern Recogn."},{"key":"437_CR8","doi-asserted-by":"crossref","unstructured":"Sezgin, M., Sankur, B.l.: Survey over image thresholding techniques and quantitative performance evaluation. Journal of Electronic imaging 13(1), 146\u2013168 (2004)","DOI":"10.1117\/1.1631315"},{"key":"437_CR9","doi-asserted-by":"crossref","unstructured":"Boillet, M., Kermorvant, C., Paquet, T.: Multiple document datasets pre-training improves text line detection with deep neural networks. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 2134\u20132141 (2021). IEEE","DOI":"10.1109\/ICPR48806.2021.9412447"},{"issue":"2","key":"437_CR10","first-page":"225","volume":"73","author":"S Oberbichler","year":"2022","unstructured":"Oberbichler, S., Boro\u015f, E., Doucet, A., Marjanen, J., Pfanzelter, E., Rautiainen, J., Toivonen, H., Tolonen, M.: Integrated interdisciplinary workflows for research on historical newspapers: Perspectives from humanities scholars, computer scientists, and librarians. J. Am. Soc. Inf. Sci. 73(2), 225\u2013239 (2022)","journal-title":"J. Am. Soc. Inf. Sci."},{"key":"437_CR11","doi-asserted-by":"crossref","unstructured":"Gonz\u00e1lez-Gallardo, C.-E., Boros, E., Giamphy, E., Hamdi, A., Moreno, J.G., Doucet, A.: Injecting temporal-aware knowledge in historical named entity recognition. In: Advances in Information Retrieval: 45th European Conference on Information Retrieval, ECIR 2023, Dublin, Ireland, April 2\u20136, 2023, Proceedings, Part I, pp. 377\u2013393 (2023). Springer","DOI":"10.1007\/978-3-031-28244-7_24"},{"key":"437_CR12","doi-asserted-by":"crossref","unstructured":"Manjavacas, E., Fonteyn, L.: Adapting vs. pre-training language models for historical languages. Journal of Data Mining & Digital Humanities, 1\u201319 (2022)","DOI":"10.46298\/jdmdh.9152"},{"key":"437_CR13","doi-asserted-by":"crossref","unstructured":"Gatos, B., Mantzaris, S., Chandrinos, K., Tsigris, A., Perantonis, S.J.: Integrated algorithms for newspaper page decomposition and article tracking. In: Proceedings of the Fifth International Conference on Document Analysis and Recognition. ICDAR\u201999 (Cat. No. PR00318), pp. 559\u2013562 (1999). IEEE","DOI":"10.1109\/ICDAR.1999.791849"},{"key":"437_CR14","unstructured":"Michael, J., Weidemann, L.R. Max, Doucet, A.: NewsEye: A Digital Investigator for Historical Newspapers. https:\/\/www.newseye.eu\/fileadmin\/deliverables\/NewsEye-T23-D27-ArticleSeparation-c-final-Submitted-v6.0.pdf. (Accessed on 05\/26\/2023) (2022)"},{"key":"437_CR15","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"437_CR16","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Transactions of the association for computational linguistics 5, 135\u2013146 (2017)","journal-title":"Transactions of the association for computational linguistics"},{"key":"437_CR17","doi-asserted-by":"crossref","unstructured":"Conneau, A., Kiela, D., Schwenk, H., Barrault, L., Bordes, A.: Supervised learning of universal sentence representations from natural language inference data. arXiv preprint arXiv:1705.02364 (2017)","DOI":"10.18653\/v1\/D17-1070"},{"key":"437_CR18","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"issue":"6","key":"437_CR19","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017)","journal-title":"Commun. ACM"},{"key":"437_CR20","unstructured":"Pinheiro, P., Collobert, R.: Recurrent convolutional neural networks for scene labeling. In: International Conference on Machine Learning, pp. 82\u201390 (2014). PMLR"},{"issue":"2","key":"437_CR21","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1007\/s10032-022-00395-7","volume":"25","author":"M Boillet","year":"2022","unstructured":"Boillet, M., Kermorvant, C., Paquet, T.: Robust text line detection in historical documents: learning and evaluation methods. International Journal on Document Analysis and Recognition (IJDAR) 25(2), 95\u2013114 (2022)","journal-title":"International Journal on Document Analysis and Recognition (IJDAR)"},{"key":"437_CR22","doi-asserted-by":"crossref","unstructured":"Bansal, A., Chaudhury, S., Roy, S.D., Srivastava, J.: Newspaper article extraction using hierarchical fixed point model. In: 2014 11th IAPR International Workshop on Document Analysis Systems, pp. 257\u2013261 (2014). IEEE","DOI":"10.1109\/DAS.2014.42"},{"key":"437_CR23","unstructured":"Hadjar, K., Hitz, O., Ingold, R.: Newspaper page decomposition using a split and merge approach. In: Proceedings of Sixth International Conference on Document Analysis and Recognition, pp. 1186\u20131189 (2001). IEEE"},{"key":"437_CR24","doi-asserted-by":"crossref","unstructured":"Naoum, A., Nothman, J., Curran, J.: Article segmentation in digitised newspapers with a 2d markov model. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1007\u20131014 (2019). IEEE","DOI":"10.1109\/ICDAR.2019.00165"},{"key":"437_CR25","doi-asserted-by":"crossref","unstructured":"Kettunen, K., Ruokolainen, T., Liukkonen, E., Tranouez, P., Antelme, D., Paquet, T.: Detecting articles in a digitized finnish historical newspaper collection 1771-1929: early results using the pivaj software. In: Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage, pp. 59\u201364 (2019)","DOI":"10.1145\/3322905.3322911"},{"key":"437_CR26","doi-asserted-by":"crossref","unstructured":"Barman, R., Ehrmann, M., Clematide, S., Oliveira, S.A., Kaplan, F.: Combining visual and textual features for semantic segmentation of historical newspapers. Journal of Data Mining & Digital Humanities (HistoInformatics) (2021)","DOI":"10.46298\/jdmdh.6107"},{"key":"437_CR27","doi-asserted-by":"crossref","unstructured":"Girdhar, N., Sharma, D., Coustaty, M., Doucet, A.: Leveraging transfer learning for article segmentation in historical newspapers. In: The 28th International Conference on Theory and Practice of Digital Libraries (2024)","DOI":"10.1007\/978-3-031-72437-4_13"},{"key":"437_CR28","doi-asserted-by":"crossref","unstructured":"Hebert, D., Palfray, T., Nicolas, S., Tranouez, P., Paquet, T.: Automatic article extraction in old newspapers digitized collections. In: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, pp. 3\u20138 (2014)","DOI":"10.1145\/2595188.2595195"},{"key":"437_CR29","doi-asserted-by":"crossref","unstructured":"Yang, X., Yumer, E., Asente, P., Kraley, M., Kifer, D., Lee\u00a0Giles, C.: Learning to extract semantic structure from documents using multimodal fully convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5315\u20135324 (2017)","DOI":"10.1109\/CVPR.2017.462"},{"key":"437_CR30","doi-asserted-by":"crossref","unstructured":"Iyyer, M., Manjunatha, V., Boyd-Graber, J., Daum\u00e9\u00a0III, H.: Deep unordered composition rivals syntactic methods for text classification. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (volume 1: Long Papers), pp. 1681\u20131691 (2015)","DOI":"10.3115\/v1\/P15-1162"},{"key":"437_CR31","doi-asserted-by":"crossref","unstructured":"Joulin, A., Grave, E., Bojanowski, P., Mikolov, T.: Bag of tricks for efficient text classification. arXiv preprint arXiv:1607.01759 (2016)","DOI":"10.18653\/v1\/E17-2068"},{"key":"437_CR32","unstructured":"Doucet, A., Gasteiner, M., Granroth-Wilding, M., Kaiser, M., Kaukonen, M., Labahn, R., Moreux, J.-P., Muehlberger, G., Pfanzelter, E., Th\u00e9renty, M.-\u00c8., et al.: Newseye: A digital investigator for historical newspapers. In: 15th Annual International Conference of the Alliance of Digital Humanities Organizations, DH 2020 (2020)"},{"key":"437_CR33","doi-asserted-by":"crossref","unstructured":"Girdhar, N., Coustaty, M., Doucet, A.: Benchmarking nas for article separation in historical newspapers. In: International Conference on Asian Digital Libraries, pp. 76\u201388 (2023). Springer","DOI":"10.1007\/978-981-99-8085-7_7"},{"key":"437_CR34","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"437_CR35","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1016\/j.procs.2013.05.200","volume":"18","author":"G Andrade","year":"2013","unstructured":"Andrade, G., Ramos, G., Madeira, D., Sachetto, R., Ferreira, R., Rocha, L.: G-dbscan: A gpu accelerated algorithm for density-based clustering. Procedia Computer Science 18, 369\u2013378 (2013)","journal-title":"Procedia Computer Science"},{"key":"437_CR36","unstructured":"Ester, M., Kriegel, H.-P., Sander, J., Xu, X., et al.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: Kdd, vol. 96, pp. 226\u2013231 (1996)"},{"key":"437_CR37","doi-asserted-by":"publisher","unstructured":"Muehlberger, G., Hackl, G.: NewsEye \/ READ AS Training Dataset from French Newspapers (19th, Early 20th C.). https:\/\/doi.org\/10.5281\/zenodo.4600636","DOI":"10.5281\/zenodo.4600636"},{"key":"437_CR38","doi-asserted-by":"publisher","unstructured":"Muehlberger, G., Hackl, G.: NewsEye \/ READ AS Training Dataset from Finnish Newspapers (19th C.). https:\/\/doi.org\/10.5281\/zenodo.4600746","DOI":"10.5281\/zenodo.4600746"}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-025-00437-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00799-025-00437-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-025-00437-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T17:01:15Z","timestamp":1773766875000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00799-025-00437-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,5]]},"references-count":38,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["437"],"URL":"https:\/\/doi.org\/10.1007\/s00799-025-00437-5","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"value":"1432-5012","type":"print"},{"value":"1432-1300","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,5]]},"assertion":[{"value":"31 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 December 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Not applicable, as this research does not involve human or animal subjects.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}}],"article-number":"2"}}