{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:44:27Z","timestamp":1758123867837,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819980840"},{"type":"electronic","value":"9789819980857"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-8085-7_8","type":"book-chapter","created":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T05:02:07Z","timestamp":1701234127000},"page":"89-105","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["STRAS: A Semantic Textual-Cues Leveraged Rule-Based Approach for\u00a0Article Separation in\u00a0Historical Newspapers"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1009-3875","authenticated-orcid":false,"given":"Nancy","family":"Girdhar","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0123-439X","authenticated-orcid":false,"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6160-3356","authenticated-orcid":false,"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,30]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"An, C., Yin, D., Baird, H.S.: Document segmentation using pixel-accurate ground truth. In: 2010 20th International Conference on Pattern Recognition, pp. 245\u2013248. IEEE (2010)","DOI":"10.1109\/ICPR.2010.69"},{"key":"8_CR2","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1016\/j.procs.2013.05.200","volume":"18","author":"G Andrade","year":"2013","unstructured":"Andrade, G., Ramos, G., Madeira, D., Sachetto, R., Ferreira, R., Rocha, L.: G-DBSCAN: a GPU accelerated algorithm for density-based clustering. Procedia Comput. Sci. 18, 369\u2013378 (2013)","journal-title":"Procedia Comput. Sci."},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Bansal, A., Chaudhury, S., Roy, S.D., Srivastava, J.: Newspaper article extraction using hierarchical fixed point model. In: 2014 11th IAPR International Workshop on Document Analysis Systems, pp. 257\u2013261. IEEE (2014)","DOI":"10.1109\/DAS.2014.42"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Boillet, M., Kermorvant, C., Paquet, T.: Multiple document datasets pre-training improves text line detection with deep neural networks. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 2134\u20132141. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412447"},{"key":"8_CR5","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1007\/s10032-022-00395-7","volume":"25","author":"M Boillet","year":"2022","unstructured":"Boillet, M., Kermorvant, C., Paquet, T.: Robust text line detection in historical documents: learning and evaluation methods. IJDAR 25, 95\u2013114 (2022)","journal-title":"IJDAR"},{"key":"8_CR6","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguist. 5, 135\u2013146 (2017)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Conneau, A., Kiela, D., Schwenk, H., Barrault, L., Bordes, A.: Supervised learning of universal sentence representations from natural language inference data. arXiv preprint arXiv:1705.02364 (2017)","DOI":"10.18653\/v1\/D17-1070"},{"key":"8_CR8","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"8_CR9","unstructured":"Doucet, A., et al.: NewsEye: a digital investigator for historical newspapers. In: 15th Annual International Conference of the Alliance of Digital Humanities Organizations, DH 2020 (2020)"},{"key":"8_CR10","unstructured":"Ester, M., Kriegel, H.P., Sander, J., Xu, X., et al.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: KDD 1996: Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, vol. 96, pp. 226\u2013231 (1996)"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Gatos, B., Mantzaris, S., Chandrinos, K., Tsigris, A., Perantonis, S.J.: Integrated algorithms for newspaper page decomposition and article tracking. In: Proceedings of the Fifth International Conference on Document Analysis and Recognition. ICDAR\u201999 (Cat. No. PR00318), pp. 559\u2013562. IEEE (1999)","DOI":"10.1109\/ICDAR.1999.791849"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Gatos, B., Pratikakis, I., Perantonis, S.J.: Efficient binarization of historical and degraded document images. In: 2008 The Eighth IAPR International Workshop on Document Analysis Systems, pp. 447\u2013454. IEEE (2008)","DOI":"10.1109\/DAS.2008.66"},{"key":"8_CR13","doi-asserted-by":"publisher","unstructured":"Gonz\u00e1lez-Gallardo, C.E., Boros, E., Giamphy, E., Hamdi, A., Moreno, J.G., Doucet, A.: Injecting temporal-aware knowledge in historical named entity recognition. In: Kamps, J., et al. Advances in Information Retrieval. ECIR 2023. Lecture Notes in Computer Science, vol. 13980, pp. 377\u2013393. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-28244-7_24","DOI":"10.1007\/978-3-031-28244-7_24"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Hebert, D., Paquet, T., Nicolas, S.: Continuous CRF with multi-scale quantization feature functions application to structure extraction in old newspaper. In: 2011 International Conference on Document Analysis and Recognition, pp. 493\u2013497. IEEE (2011)","DOI":"10.1109\/ICDAR.2011.105"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Iyyer, M., Manjunatha, V., Boyd-Graber, J., Daum\u00e9 III, H.: Deep unordered composition rivals syntactic methods for text classification. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (volume 1: Long papers), pp. 1681\u20131691 (2015)","DOI":"10.3115\/v1\/P15-1162"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"Joulin, A., Grave, E., Bojanowski, P., Mikolov, T.: Bag of tricks for efficient text classification. arXiv preprint arXiv:1607.01759 (2016)","DOI":"10.18653\/v1\/E17-2068"},{"issue":"6","key":"8_CR17","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017)","journal-title":"Commun. ACM"},{"issue":"10","key":"8_CR18","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1016\/0031-3203(94)90068-X","volume":"27","author":"DS Le","year":"1994","unstructured":"Le, D.S., Thoma, G.R., Wechsler, H.: Automated page orientation and skew angle detection for binary document images. Pattern Recogn. 27(10), 1325\u20131344 (1994)","journal-title":"Pattern Recogn."},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Manjavacas, E., Fonteyn, L.: Adapting vs. pre-training language models for historical languages. J. Data Min. Digit. Humanit. 1\u201319 (2022)","DOI":"10.46298\/jdmdh.9152"},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Meier, B., Stadelmann, T., Stampfli, J., Arnold, M., Cieliebak, M.: Fully convolutional neural networks for newspaper article segmentation. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol. 1, pp. 414\u2013419. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.75"},{"key":"8_CR22","unstructured":"Michael, J., Weidemann, Max, L.R., Doucet, A.: NewsEye: a digital investigator for historical newspapers (2022). www.newseye.eu\/fileadmin\/deliverables\/NewsEye-T23-D27-ArticleSeparation-c-final-Submitted-v6.0.pdf. Accessed 26 May 2023"},{"key":"8_CR23","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"8_CR24","doi-asserted-by":"publisher","unstructured":"Muehlberger, G., Hackl, G.: NewsEye \/ READ AS training dataset from French newspapers (19th, early 20th C.) (2021). https:\/\/doi.org\/10.5281\/zenodo.4600636","DOI":"10.5281\/zenodo.4600636"},{"key":"8_CR25","doi-asserted-by":"publisher","unstructured":"Muehlberger, G., Hackl, G.: NewsEye \/ READ AS training dataset from finnish newspapers (19th C.) (2021). https:\/\/doi.org\/10.5281\/zenodo.4600746","DOI":"10.5281\/zenodo.4600746"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Naoum, A., Nothman, J., Curran, J.: Article segmentation in digitised newspapers with a 2D Markov model. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1007\u20131014. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00165"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Noh, H., Hong, S., Han, B.: Learning deconvolution network for semantic segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1520\u20131528 (2015)","DOI":"10.1109\/ICCV.2015.178"},{"issue":"2","key":"8_CR28","first-page":"225","volume":"73","author":"S Oberbichler","year":"2022","unstructured":"Oberbichler, S., et al.: Integrated interdisciplinary workflows for research on historical newspapers: perspectives from humanities scholars, computer scientists, and librarians. J. Am. Soc. Inf. Sci. 73(2), 225\u2013239 (2022)","journal-title":"J. Am. Soc. Inf. Sci."},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Palfray, T., Hebert, D., Nicolas, S., Tranouez, P., Paquet, T.: Logical segmentation for article extraction in digitized old newspapers. In: Proceedings of the 2012 ACM Symposium on Document Engineering, pp. 129\u2013132 (2012)","DOI":"10.1145\/2361354.2361383"},{"key":"8_CR30","unstructured":"Pinheiro, P., Collobert, R.: Recurrent convolutional neural networks for scene labeling. In: International Conference on Machine Learning, pp. 82\u201390. PMLR (2014)"},{"key":"8_CR31","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-bert: sentence embeddings using Siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Sezgin, M., Sankur, B.l.: Survey over image thresholding techniques and quantitative performance evaluation. J. Electron. Imaging 13(1), 146\u2013168 (2004)","DOI":"10.1117\/1.1631315"},{"key":"8_CR33","doi-asserted-by":"crossref","unstructured":"Yang, X., Yumer, E., Asente, P., Kraley, M., Kifer, D., Lee Giles, C.: Learning to extract semantic structure from documents using multimodal fully convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5315\u20135324 (2017)","DOI":"10.1109\/CVPR.2017.462"}],"container-title":["Lecture Notes in Computer Science","Leveraging Generative Intelligence in Digital Libraries: Towards Human-Machine Collaboration"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8085-7_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T05:06:41Z","timestamp":1701234401000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8085-7_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819980840","9789819980857"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8085-7_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"30 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICADL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Asian Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icadl2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icadl.net\/icadl2023\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"15","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,01","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1,92","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2 practice papers and 12 poster papers","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}