{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:48:50Z","timestamp":1771951730166,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":14,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"H2020","award":["770299"],"award-info":[{"award-number":["770299"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1145\/3383583.3398605","type":"proceedings-article","created":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T04:18:32Z","timestamp":1596255512000},"page":"333-336","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":28,"title":["Neural Machine Translation with BERT for Post-OCR Error Detection and Correction"],"prefix":"10.1145","author":[{"given":"Thi Tuyet Hai","family":"Nguyen","sequence":"first","affiliation":[{"name":"L3i, University of La Rochelle, La Rochelle, France"}]},{"given":"Adam","family":"Jatowt","sequence":"additional","affiliation":[{"name":"Kyoto University, Kyoto, Japan"}]},{"given":"Nhu-Van","family":"Nguyen","sequence":"additional","affiliation":[{"name":"L3i, University of La Rochelle, La Rochelle, France"}]},{"given":"Mickael","family":"Coustaty","sequence":"additional","affiliation":[{"name":"L3i, University of La Rochelle, La Rochelle, France"}]},{"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[{"name":"L3i, University of La Rochelle, La Rochelle, France"}]}],"member":"320","published-online":{"date-parts":[[2020,8]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Supervised OCR Error Detection and Correction Using Statistical and Neural Machine Translation Methods. Journal for Language Technology and Computational Linguistics","author":"Amrhein Chantal","year":"2018","unstructured":"Chantal Amrhein and Simon Clematide . 2018. Supervised OCR Error Detection and Correction Using Statistical and Neural Machine Translation Methods. Journal for Language Technology and Computational Linguistics ( 2018 ). Chantal Amrhein and Simon Clematide. 2018. Supervised OCR Error Detection and Correction Using Statistical and Neural Machine Translation Methods. Journal for Language Technology and Computational Linguistics (2018)."},{"key":"e_1_3_2_2_2_1","volume-title":"ICDAR2017 competition on post-OCR text correction. In 14th IAPR International Conference on Document Analysis and Recognition. IEEE, 1423--1428","author":"Chiron Guillaume","year":"2017","unstructured":"Guillaume Chiron , Antoine Doucet , Mickael Coustaty , and Jean-Philippe Moreux . 2017 . ICDAR2017 competition on post-OCR text correction. In 14th IAPR International Conference on Document Analysis and Recognition. IEEE, 1423--1428 . Guillaume Chiron, Antoine Doucet, Mickael Coustaty, and Jean-Philippe Moreux. 2017. ICDAR2017 competition on post-OCR text correction. In 14th IAPR International Conference on Document Analysis and Recognition. IEEE, 1423--1428."},{"key":"e_1_3_2_2_3_1","volume-title":"Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2595188.2595200"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAS.2016.44"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-4012"},{"key":"e_1_3_2_2_7_1","unstructured":"Bryan McCann James Bradbury Caiming Xiong and Richard Socher. 2017. Learned in translation: Contextualized word vectors. In Advances in Neural Information Processing Systems. 6294--6305.  Bryan McCann James Bradbury Caiming Xiong and Richard Socher. 2017. Learned in translation: Contextualized word vectors. In Advances in Neural Information Processing Systems. 6294--6305."},{"key":"e_1_3_2_2_8_1","volume-title":"Adaptive Edit-Distance and Regression Approach for Post-OCR Text Correction. In 20th International Conference on Asia-Pacific Digital Libraries, ICADL","author":"Nguyen Tuyet-Hai","year":"2018","unstructured":"Thi- Tuyet-Hai Nguyen , Mickael Coustaty , Antoine Doucet , Adam Jatowt , and Nhu-Van Nguyen . 2018 . Adaptive Edit-Distance and Regression Approach for Post-OCR Text Correction. In 20th International Conference on Asia-Pacific Digital Libraries, ICADL 2018. 278--289. Thi-Tuyet-Hai Nguyen, Mickael Coustaty, Antoine Doucet, Adam Jatowt, and Nhu-Van Nguyen. 2018. Adaptive Edit-Distance and Regression Approach for Post-OCR Text Correction. In 20th International Conference on Asia-Pacific Digital Libraries, ICADL 2018. 278--289."},{"key":"e_1_3_2_2_9_1","volume-title":"Deep Statistical Analysis of OCR Errors for Effective Post-OCR Processing. In 19th ACM\/IEEE Joint Conf. on Digital Libraries. 29--38","author":"Nguyen Tuyet-Hai","year":"2019","unstructured":"Thi- Tuyet-Hai Nguyen , Adam Jatowt , Mickael Coustaty , Nhu-Van Nguyen , and Antoine Doucet . 2019 . Deep Statistical Analysis of OCR Errors for Effective Post-OCR Processing. In 19th ACM\/IEEE Joint Conf. on Digital Libraries. 29--38 . Thi-Tuyet-Hai Nguyen, Adam Jatowt, Mickael Coustaty, Nhu-Van Nguyen, and Antoine Doucet. 2019. Deep Statistical Analysis of OCR Errors for Effective Post-OCR Processing. In 19th ACM\/IEEE Joint Conf. on Digital Libraries. 29--38."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00145"},{"key":"e_1_3_2_2_11_1","volume-title":"ICDAR 2019 Competition on Post-OCR Text Correction. In 15th IAPR International Conference on Document Analysis and Recognition, ICDAR 2019.","author":"Rigaud Christophe","year":"2019","unstructured":"Christophe Rigaud , Antoine Doucet , Mickael Coustaty , and Jean-Philippe Moreux . 2019 . ICDAR 2019 Competition on Post-OCR Text Correction. In 15th IAPR International Conference on Document Analysis and Recognition, ICDAR 2019. Christophe Rigaud, Antoine Doucet, Mickael Coustaty, and Jean-Philippe Moreux. 2019. ICDAR 2019 Competition on Post-OCR Text Correction. In 15th IAPR International Conference on Document Analysis and Recognition, ICDAR 2019."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1288"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24592-8_19"},{"key":"e_1_3_2_2_14_1","unstructured":"Yonghui Wu Mike Schuster Zhifeng Chen etal 2016. Google's neural machine translation system: Bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016).  Yonghui Wu Mike Schuster Zhifeng Chen et al. 2016. Google's neural machine translation system: Bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016)."}],"event":{"name":"JCDL '20: The ACM\/IEEE Joint Conference on Digital Libraries in 2020","location":"Virtual Event China","acronym":"JCDL '20","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval","IEEE Institute of Electrical and Electronics Engineers"]},"container-title":["Proceedings of the ACM\/IEEE Joint Conference on Digital Libraries in 2020"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383583.3398605","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3383583.3398605","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:56Z","timestamp":1750195916000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383583.3398605"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8]]},"references-count":14,"alternative-id":["10.1145\/3383583.3398605","10.1145\/3383583"],"URL":"https:\/\/doi.org\/10.1145\/3383583.3398605","relation":{},"subject":[],"published":{"date-parts":[[2020,8]]},"assertion":[{"value":"2020-08-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}