{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T05:28:01Z","timestamp":1743053281831,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030863234"},{"type":"electronic","value":"9783030863241"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86324-1_1","type":"book-chapter","created":{"date-parts":[[2021,9,6]],"date-time":"2021-09-06T19:03:28Z","timestamp":1630955008000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["FETD$$^{2}$$: A Framework for Enabling Textual Data Denoising via Robust Contextual Embeddings"],"prefix":"10.1007","author":[{"family":"Govind","sequence":"first","affiliation":[]},{"given":"C\u00e9line","family":"Alec","sequence":"additional","affiliation":[]},{"given":"Jean-Luc","family":"Manguin","sequence":"additional","affiliation":[]},{"given":"Marc","family":"Spaniol","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,7]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Astudillo, R., Amir, S., Ling, W., Silva, M., Trancoso, I.: Learning word Representations from scarce and noisy data with embedding subspaces. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 1074\u20131084. Association for Computational Linguistics, Beijing, China, July 2015. https:\/\/www.aclweb.org\/anthology\/P15-1104","DOI":"10.3115\/v1\/P15-1104"},{"key":"1_CR2","unstructured":"Belinkov, Y., Bisk, Y.: Synthetic and natural noise both break neural machine translation. In: International Conference on Learning Representations (2018)"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Boukkouri, H.E., Ferret, O., Lavergne, T., Noji, H., Zweigenbaum, P., Tsujii, J.: CharacterBERT: Reconciling ELMo and BERT for Word-Level Open-Vocabulary Representations From Characters (2020)","DOI":"10.18653\/v1\/2020.coling-main.609"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Chiron, G., Doucet, A., Coustaty, M., Moreux, J.: ICDAR2017 competition on post-OCR text correction. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR). vol. 01, pp. 1423\u20131428, November 2017. https:\/\/doi.org\/10.1109\/ICDAR.2017.232","DOI":"10.1109\/ICDAR.2017.232"},{"key":"1_CR5","unstructured":"Clark, K., Luong, M.T., Le, Q.V., Manning, C.D.: ELECTRA: pre-training text encoders as discriminators rather than generators. In: ICLR (2020). https:\/\/openreview.net\/pdf?id=r1xMH1BtvB"},{"key":"1_CR6","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. CoRR abs\/1810.04805 (2018)"},{"key":"1_CR7","unstructured":"Edizel, B., Piktus, A., Bojanowski, P., Ferreira, R., Grave, E., Silvestri, F.: Misspelling oblivious word embeddings. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT 2019), Minneapolis, MN, USA, June 2\u20137 2019, Vol. 1 (Long and Short Papers), pp. 3226\u20133234 (2019). https:\/\/aclweb.org\/anthology\/papers\/N\/N19\/N19-1326\/"},{"key":"1_CR8","unstructured":"Eger, S., et al.: Text processing like humans do: visually attacking and shielding NLP systems. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, vol. 1 (Long and Short Papers), pp. 1634\u20131647. Association for Computational Linguistics, Minneapolis, Minnesota, June 2019. https:\/\/www.aclweb.org\/anthology\/N19-1165"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997), http:\/\/dx.doi.org\/10.1162\/neco.1997.9.8.1735","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"1_CR10","unstructured":"J\u00f3zefowicz, R., Vinyals, O., Schuster, M., Shazeer, N., Wu, Y.: Exploring the limits of language modeling. CoRR abs\/1602.02410 (2016). http:\/\/arxiv.org\/abs\/1602.02410"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Kim, Y., Jernite, Y., Sontag, D., Rush, A.M.: Character-aware neural language models. In: Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence (AAAI 2016), pp. 2741\u20132749. AAAI Press (2016)","DOI":"10.1609\/aaai.v30i1.10362"},{"key":"1_CR12","doi-asserted-by":"publisher","unstructured":"Kumar, A., Makhija, P., Gupta, A.: noisy text data: achilles\u2019 heel of BERT. In: Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020), pp. 16\u201321. Association for Computational Linguistics, November 2020. https:\/\/doi.org\/10.18653\/v1\/2020.wnut-1.3, https:\/\/www.aclweb.org\/anthology\/2020.wnut-1.3","DOI":"10.18653\/v1\/2020.wnut-1.3"},{"key":"1_CR13","unstructured":"Lan, Z., Chen, M., Goodman, S., Gimpel, K., Sharma, P., Soricut, R.: ALBERT: a lite BERT for self-supervised learning of language representations. In: International Conference on Learning Representations (2020). https:\/\/openreview.net\/forum?id=H1eA7AEtvS"},{"key":"1_CR14","unstructured":"Larson, C., Lahlou, T., Mingels, D., Kulis, Z., Mueller, E.: Telephonetic: making neural language models robust to ASR and semantic noise. ArXiv abs\/1906.05678 (2019)"},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Linhares Pontes, E., Hamdi, A., Sidere, N., Doucet, A.: Impact of OCR quality on named entity linking. In: Proceedings of 21st International Conference on Asia-Pacific Digital Libraries (ICADL 2019) (2019)","DOI":"10.1007\/978-3-030-34058-2_11"},{"key":"1_CR16","doi-asserted-by":"crossref","unstructured":"Liza, F.F., Grzes, M.: Improving language modelling with noise-contrastive estimation. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11967"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Malykh, V., Logacheva, V., Khakhulin, T.: Robust word vectors: context-informed embeddings for noisy texts. In: Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text, pp. 54\u201363. Association for Computational Linguistics, Brussels, Belgium, November 2018. https:\/\/www.aclweb.org\/anthology\/W18-6108","DOI":"10.18653\/v1\/W18-6108"},{"key":"1_CR18","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: Advances in Neural Information Processing Systems, pp. 3111\u20133119 (2013)"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Nayak, A., Timmapathini, H., Ponnalagu, K., Venkoparao, V.G.: Domain adaptation challenges of BERT in tokenization and sub-word representations of out-of-vocabulary words. In: Rogers, A., Sedoc, J., Rumshisky, A. (eds.) Proceedings of the 1st Workshop on Insights from Negative Results in NLP, Insights 2020, pp. 1\u20135. ACL (2020)","DOI":"10.18653\/v1\/2020.insights-1.1"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: GloVe: global vectors for word representation. In: Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Peters, M.E., Neumann, M., Iyyer, M., Gardner, M., Clark, C., Lee, K., Zettlemoyer, L.: Deep contextualized word representations. In: Proceedings of NAACL (2018)","DOI":"10.18653\/v1\/N18-1202"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Ren, S., Deng, Y., He, K., Che, W.: Generating natural language adversarial examples through probability weighted word saliency. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 1085\u20131097. Association for Computational Linguistics, Florence, Italy, July 2019. https:\/\/www.aclweb.org\/anthology\/P19-1103","DOI":"10.18653\/v1\/P19-1103"},{"key":"1_CR23","doi-asserted-by":"publisher","unstructured":"Subramaniam, L., Roy, S., Faruquie, T., Negi, S.: A survey of types of text noise and techniques to handle noisy text. In: ACM International Conference Proceeding Serie, pp. 115\u2013122, January 2009. https:\/\/doi.org\/10.1145\/1568296.1568315","DOI":"10.1145\/1568296.1568315"},{"key":"1_CR24","unstructured":"Sun, L., et al.: Adv-BERT: BERT is not robust on misspellings! Generating nature adversarial samples on BERT. arXiv preprint arXiv:2003.04985 (2020)"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Sun, Y., Jiang, H.: Contextual text denoising with masked language model. In: Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019), pp. 286\u2013290. Association for Computational Linguistics, Hong Kong, China, November 2019","DOI":"10.18653\/v1\/D19-5537"},{"key":"1_CR26","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30, pp. 5998\u20136008. Curran Associates, Inc., Red Hook (2017)"},{"key":"1_CR27","unstructured":"Wang, W., Tang, B., Wang, R., Wang, L., Ye, A.: A survey on adversarial attacks and defenses in text. arXiv preprint arXiv:1902.07285 (2019)"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Xiong, W., et al.: TweetQA: a social media focused question answering dataset. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1496"},{"key":"1_CR29","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J.G., Salakhutdinov, R., Le, Q.V.: XLNet: generalized autoregressive pretraining for language understanding. CoRR abs\/1906.08237 (2019). http:\/\/arxiv.org\/abs\/1906.08237"},{"key":"1_CR30","unstructured":"Zhang, W.E., Sheng, Q.Z., Alhazmi, A.A.F.: Generating textual adversarial examples for deep learning models: a survey. arXiv preprint arXiv:1901.06796 (2019)"}],"container-title":["Lecture Notes in Computer Science","Linking Theory and Practice of Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86324-1_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,8]],"date-time":"2023-01-08T18:38:11Z","timestamp":1673203091000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86324-1_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030863234","9783030863241"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86324-1_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"7 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPDL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Theory and Practice of Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpdl2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.tpdl.eu\/tpdl2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"53","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.04","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}