{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T19:27:16Z","timestamp":1772393236623,"version":"3.50.1"},"reference-count":39,"publisher":"Oxford University Press (OUP)","issue":"3","license":[{"start":{"date-parts":[[2017,12,1]],"date-time":"2017-12-01T00:00:00Z","timestamp":1512086400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["R01AI130460"],"award-info":[{"award-number":["R01AI130460"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["R01GM103859"],"award-info":[{"award-number":["R01GM103859"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["U24CA194215"],"award-info":[{"award-number":["U24CA194215"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["U01AG006781"],"award-info":[{"award-number":["U01AG006781"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["ES013508"],"award-info":[{"award-number":["ES013508"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["DK112217"],"award-info":[{"award-number":["DK112217"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["TR001878"],"award-info":[{"award-number":["TR001878"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,3,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Objectives<\/jats:title>\n                  <jats:p>This study proposes a novelPrior knowledge guidedIntegrated likelihoodEstimation (PIE) method to correct bias in estimations of associations due to misclassification of electronic health record (EHR)-derived binary phenotypes, and evaluates the performance of the proposed method by comparing it to 2 methods in common practice.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Methods<\/jats:title>\n                  <jats:p>We conducted simulation studies and data analysis of real EHR-derived data on diabetes from Kaiser Permanente Washington to compare the estimation bias of associations using the proposed method, the method ignoring phenotyping errors, the maximum likelihood method with misspecified sensitivity and specificity, and the maximum likelihood method with correctly specified sensitivity and specificity (gold standard). The proposed method effectively leverages available information on phenotyping accuracy to construct a prior distribution for sensitivity and specificity, and incorporates this prior information through the integrated likelihood for bias reduction.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Our simulation studies and real data application demonstrated that the proposed method effectively reduces the estimation bias compared to the 2 current methods. It performed almost as well as the gold standard method when the prior had highest density around true sensitivity and specificity. The analysis of EHR data from Kaiser Permanente Washington showed that the estimated associations from PIE were very close to the estimates from the gold standard method and reduced bias by 60%\u2013100% compared to the 2 commonly used methods in current practice for EHR data.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Conclusions<\/jats:title>\n                  <jats:p>This study demonstrates that the proposed method can effectively reduce estimation bias caused by imperfect phenotyping in EHR-derived data by incorporating prior information through integrated likelihood.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/jamia\/ocx137","type":"journal-article","created":{"date-parts":[[2017,11,15]],"date-time":"2017-11-15T12:42:33Z","timestamp":1510749753000},"page":"345-352","source":"Crossref","is-referenced-by-count":19,"title":["PIE: A prior knowledge guided integrated likelihood estimation method for bias reduction in association studies using electronic health records data"],"prefix":"10.1093","volume":"25","author":[{"given":"Jing","family":"Huang","sequence":"first","affiliation":[{"name":"Perelman School of Medicine, University of Pennsylvania, Philadelphia, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rui","family":"Duan","sequence":"additional","affiliation":[{"name":"Perelman School of Medicine, University of Pennsylvania, Philadelphia, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rebecca A","family":"Hubbard","sequence":"additional","affiliation":[{"name":"Perelman School of Medicine, University of Pennsylvania, Philadelphia, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yonghui","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Biomedical Informatics, University of Texas Health Science Center at Houston, Houston, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jason H","family":"Moore","sequence":"additional","affiliation":[{"name":"Perelman School of Medicine, University of Pennsylvania, Philadelphia, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hua","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Biomedical Informatics, University of Texas Health Science Center at Houston, Houston, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Chen","sequence":"additional","affiliation":[{"name":"Perelman School of Medicine, University of Pennsylvania, Philadelphia, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2017,12,1]]},"reference":[{"issue":"4","key":"2020110612380757300_ocx137-B1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1016\/j.ajhg.2011.09.008","article-title":"Variants near FOXE1 are associated with hypothyroidism and other thyroid conditions: using electronic medical records for genome- and phenome-wide studies","volume":"89","author":"Denny","year":"2011","journal-title":"Am J Human Genet."},{"issue":"20","key":"2020110612380757300_ocx137-B2","doi-asserted-by":"crossref","first-page":"2016","DOI":"10.1161\/CIRCULATIONAHA.110.948828","article-title":"Identification of genomic predictors of atrioventricular conduction using electronic medical records as a tool for genome science","volume":"122","author":"Denny","year":"2010","journal-title":"Circulation."},{"issue":"79","key":"2020110612380757300_ocx137-B3","doi-asserted-by":"crossref","first-page":"79re1","DOI":"10.1126\/scitranslmed.3001807","article-title":"Electronic medical records for genetic research: results of the eMERGE consortium","volume":"3","author":"Kho","year":"2011","journal-title":"Sci Trans Med."},{"issue":"3","key":"2020110612380757300_ocx137-B4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/1746-5354-6-3-50","article-title":"Community engagement in biobanking: experiences from the eMERGE Network","volume":"6","author":"Lemke","year":"2010","journal-title":"Genomics, Soc Policy."},{"issue":"4","key":"2020110612380757300_ocx137-B5","doi-asserted-by":"crossref","first-page":"560","DOI":"10.1016\/j.ajhg.2010.03.003","article-title":"Robust replication of genotype-phenotype associations across multiple diseases in an electronic medical record","volume":"86","author":"Ritchie","year":"2010","journal-title":"Am J Human Genet."},{"issue":"1","key":"2020110612380757300_ocx137-B6","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1002\/1097-0258(20010115)20:1<139::AID-SIM644>3.0.CO;2-K","article-title":"Efficient regression calibration for logistic regression in main study\/internal validation study designs with an imperfect reference instrument","volume":"20","author":"Spiegelman","year":"2001","journal-title":"Stat Med."},{"issue":"4","key":"2020110612380757300_ocx137-B7","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1016\/j.jclinepi.2004.10.012","article-title":"A review of uses of health care utilization databases for epidemiologic research on therapeutics","volume":"58","author":"Schneeweiss","year":"2005","journal-title":"J Clin Epidemiol."},{"issue":"1","key":"2020110612380757300_ocx137-B8","doi-asserted-by":"crossref","first-page":"1203","DOI":"10.13063\/2327-9214.1203","article-title":"A general framework for considering selection bias in EHR-based studies: what data are observed and why?","volume":"4","author":"Haneuse","year":"2016","journal-title":"eGEMs."},{"issue":"e1","key":"2020110612380757300_ocx137-B9","doi-asserted-by":"crossref","first-page":"e20","DOI":"10.1093\/jamia\/ocv130","article-title":"Combining billing codes, clinical notes, and medications from electronic health records provides superior phenotyping performance","volume":"23","author":"Wei","year":"2016","journal-title":"J Am Med Inform Assoc."},{"issue":"9","key":"2020110612380757300_ocx137-B10","doi-asserted-by":"crossref","first-page":"1205","DOI":"10.1093\/bioinformatics\/btq126","article-title":"PheWAS: demonstrating the feasibility of a phenome-wide scan to discover gene-disease associations","volume":"26","author":"Denny","year":"2010","journal-title":"Bioinformatics."},{"issue":"2015","key":"2020110612380757300_ocx137-B11","first-page":"147","article-title":"A modular architecture for electronic health record\u2013driven phenotyping","author":"Rasmussen","year":"2015","journal-title":"AMIA Summits Transl Sci Proc."},{"issue":"2","key":"2020110612380757300_ocx137-B12","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1093\/oxfordjournals.aje.a009251","article-title":"Logistic regression when the outcome is measured with uncertainty","volume":"146","author":"Magder","year":"1997","journal-title":"Am J Epidemiol."},{"issue":"2016","key":"2020110612380757300_ocx137-B13","first-page":"1764","article-title":"An empirical study for impacts of measurement errors on ehr based association studies","author":"Duan","year":"2017","journal-title":"AMIA Annu Symp Proc."},{"key":"2020110612380757300_ocx137-B14","doi-asserted-by":"crossref","DOI":"10.1201\/9781420010138","volume-title":"Measurement Error in Nonlinear Models: A Modern Perspective","author":"Carroll","year":"2006"},{"key":"2020110612380757300_ocx137-B15","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1111\/j.2517-6161.1988.tb01723.x","article-title":"Binary regression models for contaminated data","volume":"50","author":"Copas","year":"1988","journal-title":"J Royal Stats Soc. Series B (Methodological)."},{"issue":"6","key":"2020110612380757300_ocx137-B16","doi-asserted-by":"crossref","first-page":"540","DOI":"10.1056\/NEJMoa1215740","article-title":"Glucose levels and risk of dementia","volume":"369","author":"Crane","year":"2013","journal-title":"New Engl J Med."},{"issue":"14","key":"2020110612380757300_ocx137-B17","doi-asserted-by":"crossref","first-page":"2221","DOI":"10.1002\/sim.2094","article-title":"Does it always help to adjust for misclassification of a binary outcome in logistic regression?","volume":"24","author":"Luan","year":"2005","journal-title":"Stats Med."},{"issue":"2","key":"2020110612380757300_ocx137-B18","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1136\/amiajnl-2011-000597","article-title":"Impact of data fragmentation across healthcare centers on the accuracy of a high-throughput clinical phenotyping algorithm for specifying subjects with type 2 diabetes mellitus","volume":"19","author":"Wei","year":"2012","journal-title":"J Am Med Inform Assoc"},{"issue":"2","key":"2020110612380757300_ocx137-B19","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1016\/j.amjcard.2015.10.031","article-title":"A simple and portable algorithm for identifying atrial fibrillation in the electronic medical record","volume":"117","author":"Khurshid","year":"2016","journal-title":"Am J Cardiol."},{"issue":"4","key":"2020110612380757300_ocx137-B20","doi-asserted-by":"crossref","first-page":"947","DOI":"10.1111\/j.0006-341X.2003.00109.x","article-title":"Discrete proportional hazards models for mismeasured outcomes","volume":"59","author":"Meier","year":"2003","journal-title":"Biometrics."},{"issue":"3","key":"2020110612380757300_ocx137-B21","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1093\/biomet\/asm040","article-title":"Integrated likelihood functions for non-Bayesian inference","volume":"94","author":"Severini","year":"2007","journal-title":"Biometrika."},{"issue":"4","key":"2020110612380757300_ocx137-B22","doi-asserted-by":"crossref","first-page":"843","DOI":"10.1093\/biomet\/86.4.843","article-title":"Bias and efficiency loss due to misclassified responses in binary regression","volume":"86","author":"Neuhaus","year":"1999","journal-title":"Biometrika."},{"issue":"1","key":"2020110612380757300_ocx137-B23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1214\/ss\/1009211803","article-title":"Integrated likelihood methods for eliminating nuisance parameters","volume":"14","author":"Berger","year":"1999","journal-title":"Stats Sci."},{"issue":"11","key":"2020110612380757300_ocx137-B24","doi-asserted-by":"crossref","first-page":"1737","DOI":"10.1001\/archneur.59.11.1737","article-title":"Dementia and Alzheimer disease incidence: a prospective cohort study","volume":"59","author":"Kukull","year":"2002","journal-title":"Arch Neurol."},{"key":"2020110612380757300_ocx137-B25","doi-asserted-by":"crossref","first-page":"b81","DOI":"10.1136\/bmj.b81","article-title":"Use of primary care electronic medical record database in drug efficacy research on cardiovascular outcomes: comparison of database and randomised controlled trial findings","volume":"338","author":"Tannen","year":"2009","journal-title":"BMJ."},{"issue":"4","key":"2020110612380757300_ocx137-B26","doi-asserted-by":"crossref","first-page":"e33224","DOI":"10.1371\/journal.pone.0033224","article-title":"The co-morbidity burden of children and young adults with autism spectrum disorders","volume":"7","author":"Kohane","year":"2012","journal-title":"PLoS One."},{"issue":"7","key":"2020110612380757300_ocx137-B27","doi-asserted-by":"crossref","first-page":"e2626","DOI":"10.1371\/journal.pone.0002626","article-title":"Automated identification of acute hepatitis B using electronic medical record data to facilitate public health surveillance","volume":"3","author":"Klompas","year":"2008","journal-title":"PLoS One."},{"issue":"1","key":"2020110612380757300_ocx137-B28","doi-asserted-by":"crossref","first-page":"40","DOI":"10.2215\/CJN.04230510","article-title":"Development and validation of an electronic health record\u2013based chronic kidney disease registry","volume":"6","author":"Navaneethan","year":"2011","journal-title":"Clin J Am Soc Nephrol."},{"issue":"e1","key":"2020110612380757300_ocx137-B29","doi-asserted-by":"crossref","first-page":"e162","DOI":"10.1136\/amiajnl-2011-000583","article-title":"Portability of an algorithm to identify rheumatoid arthritis in electronic health records","volume":"19","author":"Carroll","year":"2012","journal-title":"J Am Med Inform Assoc."},{"issue":"8","key":"2020110612380757300_ocx137-B30","doi-asserted-by":"crossref","first-page":"1120","DOI":"10.1002\/acr.20184","article-title":"Electronic medical records for discovery research in rheumatoid arthritis","volume":"62","author":"Liao","year":"2010","journal-title":"Arthritis Care Res."},{"key":"2020110612380757300_ocx137-B31","doi-asserted-by":"crossref","first-page":"S30","DOI":"10.1097\/MLR.0b013e318259c011","article-title":"Diabetes and asthma case identification, validation, and representativeness when using electronic health data to construct registries for comparative effectiveness and epidemiologic research","volume":"50","author":"Desai","year":"2012","journal-title":"Med Care."},{"issue":"4","key":"2020110612380757300_ocx137-B32","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1136\/amiajnl-2011-000557","article-title":"Validity of electronic health record\u2013derived quality measurement for performance monitoring","volume":"19","author":"Parsons","year":"2012","journal-title":"J Am Med Inform Assoc."},{"key":"2020110612380757300_ocx137-B33","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1177\/1062860611403136","article-title":"How good are the data? Feasible approach to validation of metrics of quality derived from an outpatient electronic health record","volume":"26","author":"Benin","year":"2011","journal-title":"Am J Med Qual."},{"issue":"e1","key":"2020110612380757300_ocx137-B34","doi-asserted-by":"crossref","first-page":"e147","DOI":"10.1136\/amiajnl-2012-000896","article-title":"Validation of electronic medical record-based phenotyping algorithms: results and lessons learned from the eMERGE Network","volume":"20","author":"Newton","year":"2013","journal-title":"J Am Med Inform Assoc."},{"issue":"4","key":"2020110612380757300_ocx137-B35","doi-asserted-by":"crossref","first-page":"363","DOI":"10.1176\/appi.ajp.2014.14030423","article-title":"Validation of electronic health record phenotyping of bipolar disorder cases and controls","volume":"172","author":"Castro","year":"2015","journal-title":"Am J Psychiatry."},{"issue":"2","key":"2020110612380757300_ocx137-B36","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1136\/amiajnl-2011-000439","article-title":"Use of diverse electronic medical record systems to identify genetic risk for type 2 diabetes within a genome-wide association study","volume":"19","author":"Kho","year":"2012","journal-title":"J Am Med Inform Assoc."},{"issue":"1","key":"2020110612380757300_ocx137-B37","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1002\/nav.3800040113","article-title":"A quadratic programming procedure","volume":"4","author":"Hildreth","year":"1957","journal-title":"Naval Res Logistics."},{"issue":"3","key":"2020110612380757300_ocx137-B38","doi-asserted-by":"crossref","first-page":"588","DOI":"10.1137\/0111043","article-title":"Minimizing certain convex functions","volume":"11","author":"Warga","year":"1963","journal-title":"J Soc Indust Appl Math."},{"key":"2020110612380757300_ocx137-B39","volume-title":"Iterative Solution of Nonlinear Equations in Several Variables. Vol. 30","author":"Ortega","year":"1970"}],"container-title":["Journal of the American Medical Informatics Association"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/jamia\/article-pdf\/25\/3\/345\/34150309\/ocx137.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"http:\/\/academic.oup.com\/jamia\/article-pdf\/25\/3\/345\/34150309\/ocx137.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,6]],"date-time":"2020-11-06T17:56:35Z","timestamp":1604685395000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/jamia\/article\/25\/3\/345\/4683155"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,1]]},"references-count":39,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2017,12,1]]},"published-print":{"date-parts":[[2018,3,1]]}},"URL":"https:\/\/doi.org\/10.1093\/jamia\/ocx137","relation":{},"ISSN":["1067-5027","1527-974X"],"issn-type":[{"value":"1067-5027","type":"print"},{"value":"1527-974X","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2018,3]]},"published":{"date-parts":[[2017,12,1]]}}}