{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T15:10:49Z","timestamp":1774451449894,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2009,7,23]],"date-time":"2009-07-23T00:00:00Z","timestamp":1248307200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2009,7,23]]},"DOI":"10.1145\/1568296.1568315","type":"proceedings-article","created":{"date-parts":[[2009,7,28]],"date-time":"2009-07-28T12:45:28Z","timestamp":1248785128000},"page":"115-122","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":50,"title":["A survey of types of text noise and techniques to handle noisy text"],"prefix":"10.1145","author":[{"given":"L. Venkata","family":"Subramaniam","sequence":"first","affiliation":[{"name":"IBM India Research Lab, New Delhi, India"}]},{"given":"Shourya","family":"Roy","sequence":"additional","affiliation":[{"name":"Xerox India Innovation Hub, Chennai, India"}]},{"given":"Tanveer A.","family":"Faruquie","sequence":"additional","affiliation":[{"name":"IBM India Research Lab, New Delhi, India"}]},{"given":"Sumit","family":"Negi","sequence":"additional","affiliation":[{"name":"IBM India Research Lab, New Delhi, India"}]}],"member":"320","published-online":{"date-parts":[[2009,7,23]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2007.21"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220575.1220695"},{"key":"e_1_3_2_1_3_1","first-page":"65","volume-title":"Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization","author":"Banerjee S.","year":"2005","unstructured":"S. Banerjee and A. Lavie . METEOR: An automatic metric for MT evaluation with improved correlation with human judgments . In Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization , pages 65 -- 72 , June 2005 . S. Banerjee and A. Lavie. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pages 65--72, June 2005."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1006\/cviu.1998.0687"},{"key":"e_1_3_2_1_5_1","first-page":"2771","volume-title":"Proc. EUROSPEECH","author":"Berdy U.","year":"1997","unstructured":"U. Berdy , C. Uhrik , and W. Ward . Confidence metrics based on n-gram language model backoff behaviors . In Proc. EUROSPEECH , pages 2771 -- 2774 , 1997 . U. Berdy, C. Uhrik, and W. Ward. Confidence metrics based on n-gram language model backoff behaviors. In Proc. EUROSPEECH, pages 2771--2774, 1997."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/972470.972474"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1458082.1458240"},{"key":"e_1_3_2_1_8_1","first-page":"181","volume-title":"EMNLP '07: Proceedings of 2007 Conference on Empirical Methods in Natural Language Processing","author":"Chen Q.","year":"2007","unstructured":"Q. Chen , M. Li , and M. Zhou . Improving query spelling correction using web search results . In EMNLP '07: Proceedings of 2007 Conference on Empirical Methods in Natural Language Processing , pages 181 -- 189 , 2007 . Q. Chen, M. Li, and M. Zhou. Improving query spelling correction using web search results. In EMNLP '07: Proceedings of 2007 Conference on Empirical Methods in Natural Language Processing, pages 181--189, 2007."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-007-0054-0"},{"key":"e_1_3_2_1_10_1","first-page":"293","volume-title":"EMNLP '04: Proceedings of 2004 Conference on Empirical Methods in Natural Language Processing","author":"Cucerzan S.","year":"2004","unstructured":"S. Cucerzan and E. Brill . Spelling correction as an iterative process that exploits the collective knowledge of web users . In EMNLP '04: Proceedings of 2004 Conference on Empirical Methods in Natural Language Processing , pages 293 -- 300 , 2004 . S. Cucerzan and E. Brill. Spelling correction as an iterative process that exploits the collective knowledge of web users. In EMNLP '04: Proceedings of 2004 Conference on Empirical Methods in Natural Language Processing, pages 293--300, 2004."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2003.1198860"},{"key":"e_1_3_2_1_12_1","first-page":"1713","volume-title":"7th International Conference on Spoken Language Processing (ICSLP-2002","author":"Imamura K.","year":"2002","unstructured":"K. Imamura and E. Sumita . Bilingual corpus cleaning focusing on translation literality . In 7th International Conference on Spoken Language Processing (ICSLP-2002 , pages 1713 -- 1716 , 2002 . K. Imamura and E. Sumita. Bilingual corpus cleaning focusing on translation literality. In 7th International Conference on Spoken Language Processing (ICSLP-2002, pages 1713--1716, 2002."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.3115\/1067807.1067829"},{"key":"e_1_3_2_1_14_1","first-page":"111","volume-title":"Proceedings of the Symposium on Document Image Understanding Technology","author":"Jing H.","year":"2003","unstructured":"H. Jing , D. Lopresti , and C. Shih . Summarizing noisy documents . In Proceedings of the Symposium on Document Image Understanding Technology , pages 111 -- 119 , 2003 . H. Jing, D. Lopresti, and C. Shih. Summarizing noisy documents. In Proceedings of the Symposium on Document Image Understanding Technology, pages 111--119, 2003."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1998.674406"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","first-page":"827","DOI":"10.21437\/Eurospeech.1997-281","volume-title":"in Proceedings of EuroSpeech","author":"Kemp T.","year":"1997","unstructured":"T. Kemp and T. Schaaf . Estimating confidence using word lattices . In in Proceedings of EuroSpeech , pages 827 -- 830 , 1997 . T. Kemp and T. Schaaf. Estimating confidence using word lattices. In in Proceedings of EuroSpeech, pages 827--830, 1997."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/11428817_24"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/1599081.1599137"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/1690219.1690266"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/146370.146380"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/317328.317340"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220175.1220304"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390749.1390753"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1458082.1458145"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.3115\/974147.974191"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.3115\/1072133.1072186"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_28_1","volume-title":"WWW '03: Proceedings of 12th International World Wide Web Conference","author":"Risvik K. M.","year":"2003","unstructured":"K. M. Risvik , T. Mikolajewski , and P. Boros . Query segmentation for web search . In WWW '03: Proceedings of 12th International World Wide Web Conference , 2003 . K. M. Risvik, T. Mikolajewski, and P. Boros. Query segmentation for web search. In WWW '03: Proceedings of 12th International World Wide Web Conference, 2003."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220175.1220268"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220575.1220581"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2009.41"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/0306-4573(95)00058-5"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/1018428.1020771"}],"event":{"name":"AND '09: Third Workshop on Analytics for Noisy Unstructured Text Data","location":"Barcelona Spain","acronym":"AND '09"},"container-title":["Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1568296.1568315","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1568296.1568315","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T13:38:49Z","timestamp":1750253929000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1568296.1568315"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,7,23]]},"references-count":33,"alternative-id":["10.1145\/1568296.1568315","10.1145\/1568296"],"URL":"https:\/\/doi.org\/10.1145\/1568296.1568315","relation":{},"subject":[],"published":{"date-parts":[[2009,7,23]]},"assertion":[{"value":"2009-07-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}