{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:23:19Z","timestamp":1750220599480,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":10,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"HathiTrust Research Center"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1145\/3383583.3398621","type":"proceedings-article","created":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T04:18:32Z","timestamp":1596255512000},"page":"405-408","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Improving Digital Libraries' Provision of Digital Humanities Datasets: A Case Study of HTRC Literature Dataset"],"prefix":"10.1145","author":[{"given":"Yuerong","family":"Hu","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Jiang","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ted","family":"Underwood","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J. Stephen","family":"Downie","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,8]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"HathiTrust Research Center. 2015. Genre-specific word counts for 178 381 volumes from the HathiTrust Digital Library [v.0.1] . https:\/\/wiki.htrc.illinois.edu\/display\/COM\/Word+Frequencies+in+English-Language+Literature%2C+ 1700--1922  HathiTrust Research Center. 2015. Genre-specific word counts for 178 381 volumes from the HathiTrust Digital Library [v.0.1] . https:\/\/wiki.htrc.illinois.edu\/display\/COM\/Word+Frequencies+in+English-Language+Literature%2C+ 1700--1922"},{"key":"e_1_3_2_2_2_1","unstructured":"Adam Cohen. [n.d.]. FuzzyWuzzy Project Description. https:\/\/pypi.org\/project\/fuzzywuzzy\/  Adam Cohen. [n.d.]. FuzzyWuzzy Project Description. https:\/\/pypi.org\/project\/fuzzywuzzy\/"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1002\/meet.2014.14505101047"},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the 2019 ACM\/IEEE Joint Conference on Digital Libraries. IEEE, 29--38","author":"Jatowt Adam","year":"2019","unstructured":"Adam Jatowt , Mickael Coustaty , Nhu-Van Nguyen , Antoine Doucet , 2019 . Deep Statistical Analysis of OCR Errors for Effective Post-OCR Processing . In Proceedings of the 2019 ACM\/IEEE Joint Conference on Digital Libraries. IEEE, 29--38 . Adam Jatowt, Mickael Coustaty, Nhu-Van Nguyen, Antoine Doucet, et al. 2019. Deep Statistical Analysis of OCR Errors for Effective Post-OCR Processing. In Proceedings of the 2019 ACM\/IEEE Joint Conference on Digital Libraries. IEEE, 29--38."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/JCDL.2019.00044"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197026.3203886"},{"key":"e_1_3_2_2_7_1","volume-title":"Characterizing the Google Books corpus: Strong limits to inferences of socio-cultural and linguistic evolution. PloS one","author":"Pechenick Eitan Adam","year":"2015","unstructured":"Eitan Adam Pechenick , Christopher M Danforth , and Peter Sheridan Dodds . 2015. Characterizing the Google Books corpus: Strong limits to inferences of socio-cultural and linguistic evolution. PloS one , Vol. 10 , 10 ( 2015 ). Eitan Adam Pechenick, Christopher M Danforth, and Peter Sheridan Dodds. 2015. Characterizing the Google Books corpus: Strong limits to inferences of socio-cultural and linguistic evolution. PloS one, Vol. 10, 10 (2015)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1086\/665933"},{"volume-title":"Distant horizons: Digital evidence and literary change","author":"Underwood Ted","key":"e_1_3_2_2_9_1","unstructured":"Ted Underwood . 2019. Distant horizons: Digital evidence and literary change . University of Chicago Press . Ted Underwood. 2019. Distant horizons: Digital evidence and literary change .University of Chicago Press."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2608029.2608031"}],"event":{"name":"JCDL '20: The ACM\/IEEE Joint Conference on Digital Libraries in 2020","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval","IEEE Institute of Electrical and Electronics Engineers"],"location":"Virtual Event China","acronym":"JCDL '20"},"container-title":["Proceedings of the ACM\/IEEE Joint Conference on Digital Libraries in 2020"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383583.3398621","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3383583.3398621","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:56Z","timestamp":1750195916000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383583.3398621"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8]]},"references-count":10,"alternative-id":["10.1145\/3383583.3398621","10.1145\/3383583"],"URL":"https:\/\/doi.org\/10.1145\/3383583.3398621","relation":{},"subject":[],"published":{"date-parts":[[2020,8]]},"assertion":[{"value":"2020-08-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}