{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:07:00Z","timestamp":1765544820578,"version":"3.40.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030916688"},{"type":"electronic","value":"9783030916695"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-91669-5_30","type":"book-chapter","created":{"date-parts":[[2021,11,30]],"date-time":"2021-11-30T00:03:08Z","timestamp":1638230588000},"page":"392-400","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Evaluating the Robustness of Embedding-Based Topic Models to OCR Noise"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2482-0663","authenticated-orcid":false,"given":"Elaine","family":"Zosa","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3067-9806","authenticated-orcid":false,"given":"Stephen","family":"Mutuvi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark","family":"Granroth-Wilding","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6160-3356","authenticated-orcid":false,"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,11,30]]},"reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Batmanghelich, K., Saeedi, A., Narasimhan, K., Gershman, S.: Nonparametric spherical topic modeling with word embeddings. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol. 2016, p. 537. NIH Public Access (2016)","DOI":"10.18653\/v1\/P16-2087"},{"issue":"Jan","key":"30_CR2","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J. Mach. Learn. Res. 3(Jan), 993\u20131022 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR3","unstructured":"Blevins, C.: Topic modeling Martha Ballard\u2019s diary. Cameron Blevins (2010)"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Das, R., Zaheer, M., Dyer, C.: Gaussian LDA for topic models with word embeddings. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 795\u2013804 (2015)","DOI":"10.3115\/v1\/P15-1077"},{"key":"30_CR5","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1162\/tacl_a_00325","volume":"8","author":"AB Dieng","year":"2020","unstructured":"Dieng, A.B., Ruiz, F.J., Blei, D.M.: Topic modeling in embedding spaces. Trans. Assoc. Comput. Linguist. 8, 439\u2013453 (2020)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Evershed, J., Fitch, K.: Correcting noisy OCR: context beats confusion. In: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, pp. 45\u201351 (2014)","DOI":"10.1145\/2595188.2595200"},{"issue":"4","key":"30_CR7","doi-asserted-by":"publisher","first-page":"825","DOI":"10.1093\/llc\/fqz024","volume":"34","author":"MJ Hill","year":"2019","unstructured":"Hill, M.J., Hengchen, S.: Quantifying the impact of dirty OCR on historical text analysis: eighteenth century collections online as a case study. Digit. Scholarsh. Humanit. 34(4), 825\u2013843 (2019)","journal-title":"Digit. Scholarsh. Humanit."},{"issue":"Apr","key":"30_CR8","first-page":"361","volume":"5","author":"DD Lewis","year":"2004","unstructured":"Lewis, D.D., Yang, Y., Rose, T.G., Li, F.: Rcv1: a new benchmark collection for text categorization research. J. Mach. Learn. Res. 5(Apr), 361\u2013397 (2004)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR9","unstructured":"Marjanen, J., Zosa, E., Hengchen, S., Pivovarova, L., Tolonen, M.: Topic modelling discourse dynamics in historical newspapers. arXiv preprint arXiv:2011.10428 (2020)"},{"key":"30_CR10","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"issue":"1","key":"30_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2160165.2160168","volume":"5","author":"D Mimno","year":"2012","unstructured":"Mimno, D.: Computational historiography: data mining in a century of classics journals. J. Comput. Cult. Herit. (JOCCH) 5(1), 1\u201319 (2012)","journal-title":"J. Comput. Cult. Herit. (JOCCH)"},{"key":"30_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-04257-8_1","volume-title":"Maturity and Innovation in Digital Libraries","author":"S Mutuvi","year":"2018","unstructured":"Mutuvi, S., Doucet, A., Odeo, M., Jatowt, A.: Evaluating the impact of OCR errors on topic modeling. In: Dobreva, M., Hinze, A., \u017dumer, M. (eds.) ICADL 2018. LNCS, vol. 11279, pp. 3\u201314. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-04257-8_1"},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Nguyen, T.T.H., Jatowt, A., Coustaty, M., Nguyen, N.V., Doucet, A.: Deep statistical analysis of OCR errors for effective post-OCR processing. In: 2019 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 29\u201338. IEEE (2019)","DOI":"10.1109\/JCDL.2019.00015"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"R\u00f6der, M., Both, A., Hinneburg, A.: Exploring the space of topic coherence measures. In: Proceedings of the Eighth ACM International Conference on Web Search and Data Mining, pp. 399\u2013408 (2015)","DOI":"10.1145\/2684822.2685324"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Viola, L., Verheul, J.: Mining ethnicity: discourse-driven topic modelling of immigrant discourses in the USA, 1898\u20131920. Digital Scholarship in the Humanities (2019)","DOI":"10.1093\/llc\/fqz068"},{"key":"30_CR17","unstructured":"Walker, D., Lund, W.B., Ringger, E.: Evaluating models of latent document semantics in the presence of OCR errors. In: Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing, pp. 240\u2013250 (2010)"},{"key":"30_CR18","unstructured":"Yang, T.I., Torget, A., Mihalcea, R.: Topic modeling on historical newspapers. In: Proceedings of the 5th ACL-HLT Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities, pp. 96\u2013104 (2011)"}],"container-title":["Lecture Notes in Computer Science","Towards Open and Trustworthy Digital Societies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-91669-5_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T15:59:02Z","timestamp":1709827142000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-91669-5_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030916688","9783030916695"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-91669-5_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"30 November 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICADL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Asian Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icadl2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icadl.net\/icadl2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"87","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.01","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.61","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}