{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T07:13:21Z","timestamp":1772954001223,"version":"3.50.1"},"reference-count":26,"publisher":"Elsevier BV","issue":"1-3","license":[{"start":{"date-parts":[[2002,12,1]],"date-time":"2002-12-01T00:00:00Z","timestamp":1038700800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["International Journal of Medical Informatics"],"published-print":{"date-parts":[[2002,12]]},"DOI":"10.1016\/s1386-5056(02)00057-6","type":"journal-article","created":{"date-parts":[[2002,12,3]],"date-time":"2002-12-03T11:38:12Z","timestamp":1038915492000},"page":"75-83","source":"Crossref","is-referenced-by-count":23,"title":["Evaluating and reducing the effect of data corruption when applying bag of words approaches to medical records"],"prefix":"10.1016","volume":"67","author":[{"given":"P","family":"Ruch","sequence":"first","affiliation":[]},{"given":"R","family":"Baud","sequence":"additional","affiliation":[]},{"given":"A","family":"Geissb\u00fchler","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S1386-5056(02)00057-6_BIB1","unstructured":"A. Aizawa, Linguistic techniques to improve the performance of automatic text categorization, Proceedings of the Sixth Natural Language Processing Pacific Rim Symposium (NLPRS2001), 2001, pp. 307\u2013314."},{"key":"10.1016\/S1386-5056(02)00057-6_BIB2","doi-asserted-by":"crossref","unstructured":"D. Carmel, D. Cohen, R. Fagin, E. Farchi, M. Herscovici, Y. Maarek, A. Soffer, Static index pruning for information retrieval systems, Proceedings ACM-SIGIR, 2001, pp. 43\u201350.","DOI":"10.1145\/383952.383958"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB3","series-title":"Automatic SNOMED Classification\u2014A Corpus Based Method, Yearbook of Medical Informatics","author":"de Bruijn","year":"1999"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB4","unstructured":"P. Franz, A. Zaiss, S. Schulz, U. Hahn, R. Klar, Automated coding of diagnoses: three methods compared, AMIA Symposium Proceedings, 2000."},{"issue":"Suppl. 1","key":"10.1016\/S1386-5056(02)00057-6_BIB5","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1093\/bioinformatics\/17.suppl_1.S74","article-title":"Genies: a natural-language processing system for the extraction of molecular pathways from journal articles","volume":"17","author":"Friedman","year":"2001","journal-title":"Bioinformatics"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB6","first-page":"338","article-title":"Creating knowledge repositories from biomedical reports: the medsyndikate text mining system","volume":"7","author":"Hahn","year":"2002","journal-title":"Pacific Symposium on Biocomputing"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB7","first-page":"384","article-title":"TextQuest: document clustering of MedLine abstracts for concept discovery in molecular biology","volume":"6","author":"Iliopoulos","year":"2001","journal-title":"Pacific Symposium on Biocomputing"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB8","doi-asserted-by":"crossref","unstructured":"T. Joachims, Making large-scale support vector machine learning practical, in B. Scholkopf, C. Burges, A. Surola. Advances in Kernel Methods: Support Vector machines, MIT Press, Cambridge, MA, December 1998.","DOI":"10.7551\/mitpress\/1130.003.0015"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB9","series-title":"Speech and Language Processing","author":"Jurafeky","year":"2000"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB10","doi-asserted-by":"crossref","unstructured":"P. Kantor, E. Voorhees, The TREC-5 confusion track: comparing retrieval methods for scanned text, Information Retrieval (2000) 165\u2013176.","DOI":"10.1023\/A:1009902609570"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB11","doi-asserted-by":"crossref","unstructured":"J. Klavans, S. Muresan, Evaluation of DEFINDER: a system to mine definitions from consumer-oriented medical text, JCDL'01, 2002.","DOI":"10.1145\/379437.379488"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB12","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1145\/146370.146380","article-title":"Techniques for automatically correcting words in text","volume":"24","author":"Kukich","year":"1992","journal-title":"ACM Computer Survey"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB13","doi-asserted-by":"crossref","unstructured":"E. Mittendorf, P. Schauble, Measuring the effects of data corruption on information retrieval, SDAIR Proceedings, 1996.","DOI":"10.1007\/978-1-4615-6163-7"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB14","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1145\/359038.359041","article-title":"Computer programs for detecting and correcting spelling errors","volume":"23","author":"Peterson","year":"1980","journal-title":"Communication ACM"},{"issue":"3","key":"10.1016\/S1386-5056(02)00057-6_BIB15","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1002\/asi.4630270302","article-title":"Relevance weighting of search terms","volume":"27","author":"Robertson","year":"1976","journal-title":"Journal of American Society for Information Science"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB16","first-page":"249","article-title":"Using part-of-speech and word-sense disambiguation for boosting string-edit, distance spelling correction","volume":"2101","author":"Ruch","year":"2001","journal-title":"Lecture Notes in Artificial Intelligence"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB17","unstructured":"P. Ruch, R. Baud, R. Rassinoux, P. Bouillon, G. Robert, Medical document anonymisation with a semantic lexicon, Journal of American Medical Information Association (Symposium Suppl.) (2000) 729\u2013733."},{"key":"10.1016\/S1386-5056(02)00057-6_BIB18","doi-asserted-by":"crossref","unstructured":"P. Ruch, A. Gaudinat, Comparing corpora and lexical disambiguation, ACL Workshop on Comparing Corpora Proceedings, 2001.","DOI":"10.3115\/1117729.1117732"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB19","series-title":"The SMART Retrieval System\u2014Experiment in Automatic Document Retrieval","author":"Salton","year":"1971"},{"issue":"5","key":"10.1016\/S1386-5056(02)00057-6_BIB20","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1016\/0306-4573(88)90021-0","article-title":"Term-weighting approaches in automatic text retrieval","volume":"24","author":"Salton","year":"1988","journal-title":"Information Processing and Management"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB21","series-title":"Introduction to Modern Information Retrieval","author":"Salton","year":"1983"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB22","first-page":"317","article-title":"Genes themes and microarrays: using information retrieval for large-scale gene analysis","volume":"8","author":"Shatkay","year":"2000","journal-title":"Proceddings of International Conference Intellectual System and Molecular Biology"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB23","unstructured":"A. Singhal, G. Salton, C. Buckley, Length normalization in degraded text collections. Technical Report TR95-1507, NEC, 1995."},{"key":"10.1016\/S1386-5056(02)00057-6_BIB24","doi-asserted-by":"crossref","unstructured":"K. Taghva, J. Borsack, A. Condit, Results to applying probablistic IR to OCR text, ACM-SIGIR, 1994 pp. 202\u2013211.","DOI":"10.1007\/978-1-4471-2099-5_21"},{"key":"10.1016\/S1386-5056(02)00057-6_BIB25","unstructured":"Y. Yang, X. Liu, A re-examination of text categorization methods. ACM SIGIR1, 1998, pp. 42\u201349."},{"key":"10.1016\/S1386-5056(02)00057-6_BIB26","unstructured":"Q. Zeng, Patient and clinician vocabulary: how different are they, Medlnfo'2001 Proceedings, 2001."}],"container-title":["International Journal of Medical Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1386505602000576?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1386505602000576?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,4,25]],"date-time":"2023-04-25T14:26:50Z","timestamp":1682432810000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1386505602000576"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,12]]},"references-count":26,"journal-issue":{"issue":"1-3","published-print":{"date-parts":[[2002,12]]}},"alternative-id":["S1386505602000576"],"URL":"https:\/\/doi.org\/10.1016\/s1386-5056(02)00057-6","relation":{},"ISSN":["1386-5056"],"issn-type":[{"value":"1386-5056","type":"print"}],"subject":[],"published":{"date-parts":[[2002,12]]}}}