{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:14:55Z","timestamp":1750220095214,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T00:00:00Z","timestamp":1656288000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Science Foundation","award":["ACI-1548562"],"award-info":[{"award-number":["ACI-1548562"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,27]]},"DOI":"10.1145\/3539781.3539795","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T16:23:27Z","timestamp":1657643007000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Toward a big data analysis system for historical newspaper collections research"],"prefix":"10.1145","author":[{"given":"Sandeep Puthanveetil","family":"Satheesan","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign"}]},{"family":"Bhavya","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign"}]},{"given":"Adam","family":"Davies","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign"}]},{"given":"Alan B.","family":"Craig","sequence":"additional","affiliation":[{"name":"Discovery Environment"}]},{"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"California State University"}]},{"given":"ChengXiang","family":"Zhai","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign"}]}],"member":"320","published-online":{"date-parts":[[2022,7,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Apache Airflow. https:\/\/airflow.apache.org\/  [n.d.]. Apache Airflow. https:\/\/airflow.apache.org\/"},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. Chronicling America historic American newspapers. https:\/\/lccn.loc.gov\/2007618519  [n.d.]. Chronicling America historic American newspapers. https:\/\/lccn.loc.gov\/2007618519"},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. Elasticsearch: The Official Distributed Search & Analytics Engine. https:\/\/www.elastic.co\/\/elasticsearch  [n. d.]. Elasticsearch: The Official Distributed Search & Analytics Engine. https:\/\/www.elastic.co\/\/elasticsearch"},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. Improving the quality of the output. https:\/\/tesseract-ocr.github.io\/tessdoc\/ImproveQuality.html  [n. d.]. Improving the quality of the output. https:\/\/tesseract-ocr.github.io\/tessdoc\/ImproveQuality.html"},{"key":"e_1_3_2_1_5_1","unstructured":"[n. d.]. Kibana: Explore Visualize Discover Data. https:\/\/www.elastic.co\/kibana  [n. d.]. Kibana: Explore Visualize Discover Data. https:\/\/www.elastic.co\/kibana"},{"volume-title":"d.]. POLICE OF THE MKTROPOLIS. (Hansard","year":"1817","key":"e_1_3_2_1_6_1","unstructured":"[n. d.]. POLICE OF THE MKTROPOLIS. (Hansard , 7 July 1817 ). https:\/\/api.parliament.uk\/historic-hansard\/commons\/1817\/jul\/07\/police-of-the-mktropolis [n. d.]. POLICE OF THE MKTROPOLIS. (Hansard, 7 July 1817). https:\/\/api.parliament.uk\/historic-hansard\/commons\/1817\/jul\/07\/police-of-the-mktropolis"},{"key":"e_1_3_2_1_7_1","unstructured":"[n. d.]. ProQuest Historical NewspapersTM. https:\/\/about.proquest.com\/products-services\/pq-hist-news.html  [n. d.]. ProQuest Historical Newspapers TM . https:\/\/about.proquest.com\/products-services\/pq-hist-news.html"},{"key":"e_1_3_2_1_8_1","unstructured":"[n. d.]. Python Client for Google Cloud Vision --- google-cloud-vision documentation. https:\/\/googleapis.dev\/python\/vision\/latest\/index.html  [n. d.]. Python Client for Google Cloud Vision --- google-cloud-vision documentation. https:\/\/googleapis.dev\/python\/vision\/latest\/index.html"},{"key":"e_1_3_2_1_9_1","unstructured":"[n. d.]. The Valley of the Shadow: Two Communities in the American Civil War. https:\/\/valley.lib.virginia.edu\/  [n. d.]. The Valley of the Shadow: Two Communities in the American Civil War. https:\/\/valley.lib.virginia.edu\/"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.2307\/2337836"},{"key":"e_1_3_2_1_11_1","unstructured":"2021. googleapis\/python-vision. https:\/\/github.com\/googleapis\/python-vision original-date: 2019-12-10T00:10:28Z.  2021. googleapis\/python-vision. https:\/\/github.com\/googleapis\/python-vision original-date: 2019-12-10T00:10:28Z."},{"key":"e_1_3_2_1_12_1","unstructured":"Waleed Abdulla. 2017. Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow. https:\/\/github.com\/matterport\/Mask_RCNN  Waleed Abdulla. 2017. Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow. https:\/\/github.com\/matterport\/Mask_RCNN"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1177\/009365084011003001"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2019.00223"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3325400"},{"key":"e_1_3_2_1_16_1","volume-title":"Charles LA Clarke, and Gordon V Cormack","author":"Buttcher Stefan","year":"2016","unstructured":"Stefan Buttcher , Charles LA Clarke, and Gordon V Cormack . 2016 . Information retrieval: Implementing and evaluating search engines. Mit Press . Stefan Buttcher, Charles LA Clarke, and Gordon V Cormack. 2016. Information retrieval: Implementing and evaluating search engines. Mit Press."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2017.232"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02701600"},{"key":"e_1_3_2_1_19_1","volume-title":"arXiv:1703.06870 [cs] (Jan","author":"He Kaiming","year":"2018","unstructured":"Kaiming He , Georgia Gkioxari , Piotr Doll\u00e1r , and Ross Girshick . 2018. Mask R-CNN. arXiv:1703.06870 [cs] (Jan . 2018 ). http:\/\/arxiv.org\/abs\/1703.06870 arXiv:1703.06870. Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross Girshick. 2018. Mask R-CNN. arXiv:1703.06870 [cs] (Jan. 2018). http:\/\/arxiv.org\/abs\/1703.06870 arXiv:1703.06870."},{"key":"e_1_3_2_1_20_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. 770--778. https:\/\/openaccess.thecvf.com\/content_cvpr_2016\/html\/He_Deep_Residual_Learning_CVPR_2016_paper.html  Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. 770--778. https:\/\/openaccess.thecvf.com\/content_cvpr_2016\/html\/He_Deep_Residual_Learning_CVPR_2016_paper.html"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1177\/110330889300100301"},{"key":"e_1_3_2_1_22_1","volume-title":"NeuSpell: A Neural Spelling Correction Toolkit. arXiv:2010.11085 [cs] (Oct","author":"Jayanthi Sai Muralidhar","year":"2020","unstructured":"Sai Muralidhar Jayanthi , Danish Pruthi , and Graham Neubig . 2020. NeuSpell: A Neural Spelling Correction Toolkit. arXiv:2010.11085 [cs] (Oct . 2020 ). http:\/\/arxiv.org\/abs\/2010.11085 arXiv: 2010.11085. Sai Muralidhar Jayanthi, Danish Pruthi, and Graham Neubig. 2020. NeuSpell: A Neural Spelling Correction Toolkit. arXiv:2010.11085 [cs] (Oct. 2020). http:\/\/arxiv.org\/abs\/2010.11085 arXiv: 2010.11085."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806416.2806474"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3311790.3396649"},{"key":"e_1_3_2_1_25_1","volume-title":"Weld","author":"Germain Lee Benjamin Charles","year":"2020","unstructured":"Benjamin Charles Germain Lee , Jaime Mears , Eileen Jakeway , Meghan Ferriter , Chris Adams , Nathan Yarasavage , Deborah Thomas , Kate Zwaard , and Daniel S . Weld . 2020 . The Newspaper Navigator Dataset: Extracting And Analyzing Visual Content from 16 Million Historic Newspaper Pages in Chronicling America . arXiv 2005.01583 [cs] (May 2020). http:\/\/arxiv.org\/abs\/2005.01583 arXiv: 2005.01583. Benjamin Charles Germain Lee, Jaime Mears, Eileen Jakeway, Meghan Ferriter, Chris Adams, Nathan Yarasavage, Deborah Thomas, Kate Zwaard, and Daniel S. Weld. 2020. The Newspaper Navigator Dataset: Extracting And Analyzing Visual Content from 16 Million Historic Newspaper Pages in Chronicling America. arXiv 2005.01583 [cs] (May 2020). http:\/\/arxiv.org\/abs\/2005.01583 arXiv: 2005.01583."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_27_1","unstructured":"Walter Lippmann. 1946. Public Opinion. Transaction Publishers. Google-Books-ID: YhXLOVc6BsoC.  Walter Lippmann. 1946. Public Opinion. Transaction Publishers. Google-Books-ID: YhXLOVc6BsoC."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219104.3219159"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2110486.2110490"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 3rd DH Benelux Conference (DH Benelux","author":"Martinez-Ortiz Carlos","year":"2016","unstructured":"Carlos Martinez-Ortiz , Tom Kenter , Melvin Wevers , Pim Huijnen , Jaap Verheul , and Joris van Eijnatten . 2016 . ShiCo: A Visualization Tool for Shifting Concepts Through Time . In Proceedings of the 3rd DH Benelux Conference (DH Benelux 2016). 1. Carlos Martinez-Ortiz, Tom Kenter, Melvin Wevers, Pim Huijnen, Jaap Verheul, and Joris van Eijnatten. 2016. ShiCo: A Visualization Tool for Shifting Concepts Through Time. In Proceedings of the 3rd DH Benelux Conference (DH Benelux 2016). 1."},{"volume-title":"Beyond topic-based representations for text mining. Ph. D. Dissertation","author":"Massung Sean Alexander","key":"e_1_3_2_1_31_1","unstructured":"Sean Alexander Massung . 2017. Beyond topic-based representations for text mining. Ph. D. Dissertation . University of Illinois at Urbana-Champaign. Sean Alexander Massung. 2017. Beyond topic-based representations for text mining. Ph. D. Dissertation. University of Illinois at Urbana-Champaign."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2017.75"},{"volume-title":"Youth Justice: Critical Readings","author":"Newburn Tim","key":"e_1_3_2_1_33_1","unstructured":"Tim Newburn . 2002. The contemporary politics of youth crime prevention . In Youth Justice: Critical Readings , John Muncie, Gordon Hughes, and Eugene McLaughlin (Eds.). Sage Publications, London , 452--463. Num Pages: 476. Tim Newburn. 2002. The contemporary politics of youth crime prevention. In Youth Justice: Critical Readings, John Muncie, Gordon Hughes, and Eugene McLaughlin (Eds.). Sage Publications, London, 452--463. Num Pages: 476."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2015.7363791"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1086\/218445"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_37_1","unstructured":"Sandeep Puthanveetil Satheesan. [n. d.]. draw-text-boxes-alto-viz. https:\/\/opensource.ncsa.illinois.edu\/bitbucket\/projects\/JUDEL\/repos\/draw-text-boxes-alto-viz\/  Sandeep Puthanveetil Satheesan. [n. d.]. draw-text-boxes-alto-viz. https:\/\/opensource.ncsa.illinois.edu\/bitbucket\/projects\/JUDEL\/repos\/draw-text-boxes-alto-viz\/"},{"key":"e_1_3_2_1_38_1","unstructured":"Sandeep Puthanveetil Satheesan. [n. d.]. loc-ca-search-download-app. https:\/\/opensource.ncsa.illinois.edu\/bitbucket\/projects\/JUDEL\/repos\/loc-ca-search-download-app\/  Sandeep Puthanveetil Satheesan. [n. d.]. loc-ca-search-download-app. https:\/\/opensource.ncsa.illinois.edu\/bitbucket\/projects\/JUDEL\/repos\/loc-ca-search-download-app\/"},{"key":"e_1_3_2_1_39_1","unstructured":"Sandeep Puthanveetil Satheesan. [n. d.]. sandeep-ps\/Mask_RCNN. https:\/\/github.com\/sandeep-ps\/Mask_RCNN  Sandeep Puthanveetil Satheesan. [n. d.]. sandeep-ps\/Mask_RCNN. https:\/\/github.com\/sandeep-ps\/Mask_RCNN"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.2307\/1140574"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219104.3219132"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/eScience.2019.00094"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1974.tb00994.x_eprint"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1517"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2014.80"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.586"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-32243-8_1"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972795.96"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376235"}],"event":{"name":"PASC '22: Platform for Advanced Scientific Computing Conference","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","CSCS Swiss National Supercomputing Centre"],"location":"Basel Switzerland","acronym":"PASC '22"},"container-title":["Proceedings of the Platform for Advanced Scientific Computing Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539781.3539795","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539781.3539795","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:10:02Z","timestamp":1750183802000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539781.3539795"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,27]]},"references-count":50,"alternative-id":["10.1145\/3539781.3539795","10.1145\/3539781"],"URL":"https:\/\/doi.org\/10.1145\/3539781.3539795","relation":{},"subject":[],"published":{"date-parts":[[2022,6,27]]},"assertion":[{"value":"2022-07-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}