{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,29]],"date-time":"2026-03-29T09:22:05Z","timestamp":1774776125811,"version":"3.50.1"},"reference-count":19,"publisher":"Oxford University Press (OUP)","issue":"21","license":[{"start":{"date-parts":[[2020,7,29]],"date-time":"2020-07-29T00:00:00Z","timestamp":1595980800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"US National Institutes of Health","doi-asserted-by":"crossref","award":["K99 HG010909"],"award-info":[{"award-number":["K99 HG010909"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000002","name":"US National Institutes of Health","doi-asserted-by":"crossref","award":["R01 HG010749"],"award-info":[{"award-number":["R01 HG010749"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"crossref"}]},{"name":"AL Williams Professorship"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,29]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Functional genomics data are becoming clinically actionable, raising privacy concerns. However, quantifying privacy leakage via genotyping is difficult due to the heterogeneous nature of sequencing techniques. Thus, we present FANCY, a tool that rapidly estimates the number of leaking variants from raw RNA-Seq, ATAC-Seq and ChIP-Seq reads, without explicit genotyping. FANCY employs supervised regression using overall sequencing statistics as features and provides an estimate of the overall privacy risk before data release.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>FANCY can predict the cumulative number of leaking SNVs with an average 0.95\u2009R2 for all independent test sets. We realize the importance of accurate prediction when the number of leaked variants is low. Thus, we develop a special version of the model, which can make predictions with higher accuracy when the number of leaking variants is low.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>A python and MATLAB implementation of FANCY, as well as custom scripts to generate the features can be found at https:\/\/github.com\/gersteinlab\/FANCY. We also provide jupyter notebooks so that users can optimize the parameters in the regression model based on their own data. An easy-to-use webserver that takes inputs and displays results can be found at fancy.gersteinlab.org.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa661","type":"journal-article","created":{"date-parts":[[2020,7,22]],"date-time":"2020-07-22T15:24:53Z","timestamp":1595431493000},"page":"5145-5150","source":"Crossref","is-referenced-by-count":4,"title":["FANCY: fast estimation of privacy risk in functional genomics data"],"prefix":"10.1093","volume":"36","author":[{"given":"Gamze","family":"G\u00fcrsoy","sequence":"first","affiliation":[{"name":"Computational Biology and Bioinformatics , New Haven, CT 06520, USA"},{"name":"Molecular Biophysics and Biochemistry , New Haven, CT 06520, USA"}]},{"given":"Charlotte M","family":"Brannon","sequence":"additional","affiliation":[{"name":"Computational Biology and Bioinformatics , New Haven, CT 06520, USA"},{"name":"Molecular Biophysics and Biochemistry , New Haven, CT 06520, USA"}]},{"given":"Fabio C P","family":"Navarro","sequence":"additional","affiliation":[{"name":"Computational Biology and Bioinformatics , New Haven, CT 06520, USA"},{"name":"Molecular Biophysics and Biochemistry , New Haven, CT 06520, USA"}]},{"given":"Mark","family":"Gerstein","sequence":"additional","affiliation":[{"name":"Computational Biology and Bioinformatics , New Haven, CT 06520, USA"},{"name":"Molecular Biophysics and Biochemistry , New Haven, CT 06520, USA"},{"name":"Computer Science, Yale University , New Haven, CT 06520, USA"}]}],"member":"286","published-online":{"date-parts":[[2020,7,29]]},"reference":[{"key":"2023062408072540100_btaa661-B1","first-page":"10.1","article-title":"From FastQ data to high-confidence variant calls: the Genome Analysis Toolkit best practices pipeline","volume":"43","author":"Auwera","year":"2013","journal-title":"Curr. Protoc. Bioinformatics"},{"key":"2023062408072540100_btaa661-B2","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1038\/ng.806","article-title":"A framework for variation discovery and genotyping using next-generation DNA sequencing data","volume":"43","author":"DePristo","year":"2011","journal-title":"Nat. Genet"},{"key":"2023062408072540100_btaa661-B3","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1093\/bioinformatics\/bts635","article-title":"STAR: ultrafast universal RNA-seq aligner","volume":"29","author":"Dobin","year":"2013","journal-title":"Bioinformatics"},{"key":"2023062408072540100_btaa661-B4","doi-asserted-by":"publisher","DOI":"10.1101\/345074","article-title":"Private information leakage from functional genomics data: quantification with calibration experiments and reduction via data sanitization protocols","author":"Gursoy","year":"2019","journal-title":"Biorxiv"},{"key":"2023062408072540100_btaa661-B5","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1038\/nmeth.3746","article-title":"Quantification of private information leakage from phenotype-genotype data: linking attacks","volume":"13","author":"Harmanci","year":"2016","journal-title":"Nat. Methods"},{"key":"2023062408072540100_btaa661-B6","doi-asserted-by":"crossref","first-page":"2453","DOI":"10.1038\/s41467-018-04875-5","article-title":"Analysis of sensitive information leakage in functional genomics signal profiles through genomic deletions","volume":"9","author":"Harmanci","year":"2018","journal-title":"Nat. Commun"},{"key":"2023062408072540100_btaa661-B7","doi-asserted-by":"crossref","first-page":"744","DOI":"10.1126\/science.1242463","article-title":"Coordinated effects of sequence variation on DNA binding, chromatin structure, and transcription","volume":"342","author":"Kilpinen","year":"2013","journal-title":"Science"},{"key":"2023062408072540100_btaa661-B8","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1016\/0888-7543(88)90007-9","article-title":"Genomic mapping by fingerprinting random clones: a mathematical analysis","volume":"2","author":"Lander","year":"1988","journal-title":"Genomics"},{"key":"2023062408072540100_btaa661-B9","doi-asserted-by":"crossref","first-page":"506","DOI":"10.1038\/nature12531","article-title":"Transcriptome and genome sequencing uncovers functional variation in humans","volume":"501","author":"Lappalainen","year":"2013","journal-title":"Nature"},{"key":"2023062408072540100_btaa661-B10","doi-asserted-by":"crossref","first-page":"1754","DOI":"10.1093\/bioinformatics\/btp324","article-title":"Fast and accurate short read alignment with Burrows-Wheeler Transform","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2023062408072540100_btaa661-B11","doi-asserted-by":"crossref","first-page":"2078","DOI":"10.1093\/bioinformatics\/btp352","article-title":"The Sequence Alignment\/Map format and SAMtools","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2023062408072540100_btaa661-B12","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1126\/science.1095019","article-title":"Genomic research and human subject privacy","volume":"305","author":"Lin","year":"2004","journal-title":"Science"},{"key":"2023062408072540100_btaa661-B13","doi-asserted-by":"crossref","first-page":"1181","DOI":"10.1038\/ng1007-1181","article-title":"The NCBI dbGaP database of genotypes and phenotypes","volume":"39","author":"Mailman","year":"2007","journal-title":"Nat. Genet"},{"key":"2023062408072540100_btaa661-B15","year":"2018"},{"key":"2023062408072540100_btaa661-B16","volume-title":"Gaussian Processes for Machine Learning","author":"Rasmussen","year":"2006"},{"key":"2023062408072540100_btaa661-B17","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1186\/gb-2011-12-8-125","article-title":"The real cost of sequencing: higher than you think","volume":"12","author":"Sboner","year":"2011","journal-title":"Genome Biol"},{"key":"2023062408072540100_btaa661-B18","first-page":"603","article-title":"Bayesian method to predict individual SNP genotypes from gene expression data","volume":"44","author":"Schadt","year":"2012","journal-title":"Nature"},{"key":"2023062408072540100_btaa661-B19","doi-asserted-by":"crossref","first-page":"1061","DOI":"10.1038\/nature09534","article-title":"A map of human genome variation from population-scale sequencing","volume":"467","year":"2010","journal-title":"Nature"},{"key":"2023062408072540100_btaa661-B20","doi-asserted-by":"crossref","first-page":"eaat8464","DOI":"10.1126\/science.aat8464","article-title":"Comprehensive functional genomic resource and integrative model for the human brain","volume":"362","author":"Wang","year":"2018","journal-title":"Science"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa661\/33830610\/btaa661.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/21\/5145\/50692888\/btaa661.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/21\/5145\/50692888\/btaa661.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,24]],"date-time":"2023-06-24T19:33:05Z","timestamp":1687635185000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/36\/21\/5145\/5877942"}},"subtitle":[],"editor":[{"given":"Peter","family":"Robinson","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,7,29]]},"references-count":19,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2021,1,29]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa661","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/775338","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2020,11,1]]},"published":{"date-parts":[[2020,7,29]]}}}