{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T20:25:42Z","timestamp":1748291142069,"version":"3.37.3"},"reference-count":19,"publisher":"Oxford University Press (OUP)","issue":"16","license":[{"start":{"date-parts":[[2020,11,27]],"date-time":"2020-11-27T00:00:00Z","timestamp":1606435200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000026","name":"National Institute on Drug Abuse","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000026","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01DA046619"],"award-info":[{"award-number":["R01DA046619"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,25]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>LexExp is an open-source, data-centric lexicon expansion system that generates spelling variants of lexical expressions in a lexicon using a phrase embedding model, lexical similarity-based natural language processing methods and a set of tunable threshold decay functions. The system is customizable, can be optimized for recall or precision and can generate variants for multi-word expressions.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>Code available at: https:\/\/bitbucket.org\/asarker\/lexexp; data and resources available at: https:\/\/sarkerlab.org\/lexexp.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa995","type":"journal-article","created":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T20:12:39Z","timestamp":1605643959000},"page":"2499-2501","source":"Crossref","is-referenced-by-count":14,"title":["LexExp: a system for automatically expanding concept lexicons for noisy biomedical texts"],"prefix":"10.1093","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7358-544X","authenticated-orcid":false,"given":"Abeed","family":"Sarker","sequence":"first","affiliation":[{"name":"Department of Biomedical Informatics, School of Medicine, Emory University , Atlanta, GA 30322, USA"}]}],"member":"286","published-online":{"date-parts":[[2020,11,27]]},"reference":[{"key":"2023051609053551500_btaa995-B1","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1136\/jamia.2009.002733","article-title":"An overview of MetaMap: historical perspective and recent advances","volume":"17","author":"Aronson","year":"2010","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023051609053551500_btaa995-B2","first-page":"224","article-title":"Aspiring to unintended consequences of natural language processing: a review of recent developments in clinical and consumer-generated text processing","volume":"25","author":"Demner-Fushman","year":"2016","journal-title":"IMIA Yearbook"},{"key":"2023051609053551500_btaa995-B3","first-page":"843","volume-title":"A Threshold Method of Approximate String Matching","author":"Fischer","year":"1982"},{"key":"2023051609053551500_btaa995-B4","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1016\/j.eswa.2018.04.006","article-title":"A domain transferable lexicon set for Twitter sentiment analysis using a supervised machine learning approach","volume":"106","author":"Ghiassi","year":"2018","journal-title":"Expert Syst. Appl"},{"key":"2023051609053551500_btaa995-B5","first-page":"184","article-title":"UMLS\u00ae knowledge for biomedical language processing","volume":"81","author":"McCray","year":"1993","journal-title":"Bull. Med. Libr. Assoc"},{"key":"2023051609053551500_btaa995-B6","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"Mikolov","year":"2013"},{"key":"2023051609053551500_btaa995-B7","doi-asserted-by":"crossref","first-page":"679","DOI":"10.1093\/jamia\/ocx152","article-title":"Expanding a radiology lexicon using contextual patterns in radiology reports","volume":"25","author":"Percha","year":"2018","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023051609053551500_btaa995-B8","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1186\/2041-1480-4-28","article-title":"Evaluating gold standard corpora against gene\/protein tagging solutions and lexical resources","volume":"4","author":"Rebholz-Schuhmann","year":"2013","journal-title":"J. Biomed. Sem"},{"key":"2023051609053551500_btaa995-B9","doi-asserted-by":"crossref","first-page":"1310","DOI":"10.1093\/jamia\/ocaa116","article-title":"Self-reported COVID-19 symptoms on Twitter: an analysis and a research resource","volume":"27","author":"Sarker","year":"2020","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023051609053551500_btaa995-B10","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1016\/j.jbi.2014.11.002","article-title":"Portable automatic text classification for adverse drug reaction detection via multi-corpus training","volume":"53","author":"Sarker","year":"2015","journal-title":"J. Biomed. Inform"},{"key":"2023051609053551500_btaa995-B11","doi-asserted-by":"crossref","first-page":"122","DOI":"10.1016\/j.dib.2016.11.056","article-title":"A corpus for mining drug-related knowledge from Twitter chatter: language models and their utilities","volume":"10","author":"Sarker","year":"2017","journal-title":"Data Brief"},{"key":"2023051609053551500_btaa995-B12","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1016\/j.jbi.2018.11.007","article-title":"An unsupervised and customizable misspelling generator for mining noisy health-related text sources","volume":"88","author":"Sarker","year":"2018","journal-title":"J. Biomed. Inform"},{"key":"2023051609053551500_btaa995-B13","first-page":"251","volume-title":"Typographical Nearest-Neighbor Search in a Finite-State Lexicon and its Application to Spelling Correction. Lecture Notes in Computer Science. Artificial Intelligence and Lecture Notes in Bioinformatics","author":"Savary","year":"2002"},{"key":"2023051609053551500_btaa995-B14","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1136\/jamia.2009.001560","article-title":"Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications","volume":"17","author":"Savova","year":"2010","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023051609053551500_btaa995-B15","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1136\/amiajnl-2013-001935","article-title":"A review of approaches to identifying patient phenotype cohorts using electronic health records","volume":"21","author":"Shivade","year":"2014","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023051609053551500_btaa995-B16","doi-asserted-by":"crossref","first-page":"S11","DOI":"10.1186\/1471-2105-13-S14-S11","article-title":"Matching health information seekers\u2019 queries to medical terms","volume":"13","author":"Soualmia","year":"2012","journal-title":"BMC Bioinform"},{"key":"2023051609053551500_btaa995-B17","first-page":"298","volume-title":"Generating Positive Psychosis Symptom Keywords from Electronic Health Records. Lecture Notes in Computer Science. Artificial Intelligence and Lecture Notes in Bioinformatics","author":"Viani","year":"2019"},{"key":"2023051609053551500_btaa995-B18","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1197\/jamia.M1761","article-title":"Exploring and developing consumer health vocabularies","volume":"13","author":"Zeng","year":"2006","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"2023051609053551500_btaa995-B19","doi-asserted-by":"crossref","first-page":"e27","DOI":"10.2196\/medinform.4211","article-title":"Context-sensitive spelling correction of consumer-generated content on health care","volume":"3","author":"Zhou","year":"2015","journal-title":"JMIR Med. Inform"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa995\/36253010\/btaa995.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/16\/2499\/50339134\/btaa995.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/16\/2499\/50339134\/btaa995.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T09:11:47Z","timestamp":1684228307000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/37\/16\/2499\/6007257"}},"subtitle":[],"editor":[{"given":"Wren","family":"Jonathan","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,11,27]]},"references-count":19,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2021,8,25]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa995","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"type":"print","value":"1367-4803"},{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2021,8,15]]},"published":{"date-parts":[[2020,11,27]]}}}