{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T04:53:03Z","timestamp":1774500783814,"version":"3.50.1"},"reference-count":67,"publisher":"Oxford University Press (OUP)","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Database"],"published-print":{"date-parts":[[2017]]},"DOI":"10.1093\/database\/baw163","type":"journal-article","created":{"date-parts":[[2016,11,24]],"date-time":"2016-11-24T07:06:50Z","timestamp":1479971210000},"page":"baw163","source":"Crossref","is-referenced-by-count":42,"title":["Duplicates, redundancies and inconsistencies in the primary nucleotide databases: a descriptive study"],"prefix":"10.1093","volume":"2017","author":[{"given":"Qingyu","family":"Chen","sequence":"first","affiliation":[]},{"given":"Justin","family":"Zobel","sequence":"additional","affiliation":[]},{"given":"Karin","family":"Verspoor","sequence":"additional","affiliation":[]}],"member":"286","published-online":{"date-parts":[[2017,1,10]]},"reference":[{"key":"2017011117351194000_2017.0.baw163.1","doi-asserted-by":"crossref","first-page":"96","DOI":"10.1109\/MC.2007.331","article-title":"The current state of business intelligence","volume":"40","author":"Watson","year":"2007","journal-title":"Computer"},{"key":"2017011117351194000_2017.0.baw163.2","doi-asserted-by":"publisher","DOI":"10.1016\/0895-4356(94)90010-8"},{"key":"2017011117351194000_2017.0.baw163.3","doi-asserted-by":"crossref","unstructured":"Tintle N.L. Gordon D. McMahon F.J. Finch S.J. (2007) Using duplicate genotyped data in genetic analyses: testing association and estimating error rates. Stat. Appl. Genet. Mol. Biol., 6, Article 4.","DOI":"10.2202\/1544-6115.1251"},{"key":"2017011117351194000_2017.0.baw163.4","doi-asserted-by":"crossref","unstructured":"Fan W. (2012), Web-Age Information Management. Springer, Berlin, pp. 1\u201316.","DOI":"10.1007\/978-3-642-32281-5_1"},{"key":"2017011117351194000_2017.0.baw163.5","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gks1084"},{"key":"2017011117351194000_2017.0.baw163.6","doi-asserted-by":"publisher","DOI":"10.1016\/0168-9525(96)60040-7"},{"key":"2017011117351194000_2017.0.baw163.7","unstructured":"M\u00fcller H. Naumann F. Freytag J. (2003) Data quality in genome databases. Eighth International Conference on Information Quality (IQ 2003). MIT Press, Cambridge, MA."},{"key":"2017011117351194000_2017.0.baw163.8","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.2007.R005"},{"key":"2017011117351194000_2017.0.baw163.9","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/12.1.1"},{"key":"2017011117351194000_2017.0.baw163.10","first-page":"11","article-title":"Detecting redundancy in biological databases? An efficient approach","volume":"9","author":"Chellamuthu","year":"2009","journal-title":"Global J. Comput. Sci. Technol"},{"key":"2017011117351194000_2017.0.baw163.11","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/14.5.423"},{"key":"2017011117351194000_2017.0.baw163.12","first-page":"S22388.","article-title":"Duplicate detection in biological data using association rule mining","volume":"501","author":"Koh","year":"2004","journal-title":"Locus"},{"key":"2017011117351194000_2017.0.baw163.13","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/24.2.316"},{"key":"2017011117351194000_2017.0.baw163.14","doi-asserted-by":"publisher","DOI":"10.1093\/protein\/15.8.643"},{"key":"2017011117351194000_2017.0.baw163.15","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1504\/IJDMB.2010.034196","article-title":"Detecting duplicate biological entities using shortest path edit distance","volume":"4","author":"Rudniy","year":"2010","journal-title":"Int. J. Data Mining Bioinformatics"},{"key":"2017011117351194000_2017.0.baw163.16","doi-asserted-by":"publisher","DOI":"10.6026\/97320630005234"},{"key":"2017011117351194000_2017.0.baw163.17","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1007\/s10115-009-0254-7","article-title":"Detecting duplicate biological entities using Markov random field-based edit distance","volume":"25","author":"Song","year":"2010","journal-title":"Knowl. Information Syst"},{"key":"2017011117351194000_2017.0.baw163.18","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btm098"},{"key":"2017011117351194000_2017.0.baw163.19","doi-asserted-by":"crossref","first-page":"5084.","DOI":"10.1103\/PhysRevE.51.5084","article-title":"Long-range correlation properties of coding and noncoding DNA sequences: GenBank analysis","volume":"51","author":"Buldyrev","year":"1995","journal-title":"Phys. Rev. E"},{"key":"2017011117351194000_2017.0.baw163.20","doi-asserted-by":"crossref","first-page":"102","DOI":"10.21273\/JASHS.130.1.102","article-title":"Strawberry GenBank-derived and genomic simple sequence repeat (SSR) markers and their utility with strawberry, blackberry, and red and black raspberry","volume":"130","author":"Lewers","year":"2005","journal-title":"J. Am. Soc. Horticult. Sci"},{"key":"2017011117351194000_2017.0.baw163.21","doi-asserted-by":"publisher","DOI":"10.1016\/S0168-9525(99)01706-0"},{"key":"2017011117351194000_2017.0.baw163.22","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1016\/j.ympev.2012.09.002","article-title":"Molecular phylogeny of North American Branchiobdellida (Annelida: Clitellata)","volume":"66","author":"Williams","year":"2013","journal-title":"Mol. Phylogenet. Evol"},{"key":"2017011117351194000_2017.0.baw163.23","doi-asserted-by":"publisher","DOI":"10.1016\/S0168-9525(01)02348-4"},{"key":"2017011117351194000_2017.0.baw163.24","doi-asserted-by":"publisher","DOI":"10.1038\/ng0294-119"},{"key":"2017011117351194000_2017.0.baw163.25","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bat035"},{"key":"2017011117351194000_2017.0.baw163.26","doi-asserted-by":"crossref","unstructured":"Bastian F. Parmentier G. Roux J. . (2008), Data Integration in the Life Sciences. Springer, Berlin, pp. 124\u2013131.","DOI":"10.1007\/978-3-540-69828-9_12"},{"key":"2017011117351194000_2017.0.baw163.27","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bat060"},{"key":"2017011117351194000_2017.0.baw163.28","doi-asserted-by":"crossref","first-page":"D279","DOI":"10.1093\/nar\/gkv1344","article-title":"The Pfam protein families database: towards a more sustainable future","volume":"44","author":"Finn","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2017011117351194000_2017.0.baw163.29","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baq037"},{"key":"2017011117351194000_2017.0.baw163.30","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bav091"},{"key":"2017011117351194000_2017.0.baw163.31","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baq039"},{"key":"2017011117351194000_2017.0.baw163.32","doi-asserted-by":"publisher","DOI":"10.1093\/database\/baq020"},{"key":"2017011117351194000_2017.0.baw163.33","doi-asserted-by":"crossref","first-page":"bar023.","DOI":"10.1093\/database\/bar023","article-title":"The modENCODE Data Coordination Center: lessons in harvesting comprehensive experimental details","volume":"2011","author":"Washington","year":"2011","journal-title":"Database"},{"key":"2017011117351194000_2017.0.baw163.34","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bat015"},{"key":"2017011117351194000_2017.0.baw163.35","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bav112"},{"key":"2017011117351194000_2017.0.baw163.36","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bap015"},{"key":"2017011117351194000_2017.0.baw163.37","doi-asserted-by":"crossref","first-page":"1467","DOI":"10.1016\/j.eswa.2012.08.045","article-title":"Detecting near-duplicate documents using sentence-level features and supervised learning","volume":"40","author":"Lin","year":"2013","journal-title":"Expert Syst. Appl"},{"key":"2017011117351194000_2017.0.baw163.38","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bts565"},{"key":"2017011117351194000_2017.0.baw163.39","doi-asserted-by":"crossref","first-page":"D36","DOI":"10.1093\/nar\/gks1195","article-title":"GenBank","volume":"41","author":"Benson","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2017011117351194000_2017.0.baw163.40","doi-asserted-by":"crossref","unstructured":"Zorita E.V. Cusc\u00f3 P. Filion G. (2015) Starcode: sequence clustering based on all-pairs search. Bioinformatics, btv053.","DOI":"10.1093\/bioinformatics\/btv053"},{"key":"2017011117351194000_2017.0.baw163.41","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1007\/s00778-002-0072-y","article-title":"A Bayesian decision model for cost optimal record matching","volume":"12","author":"Verykios","year":"2003","journal-title":"VLDB J"},{"key":"2017011117351194000_2017.0.baw163.42","doi-asserted-by":"publisher","DOI":"10.1136\/bmjqs-2012-001419"},{"key":"2017011117351194000_2017.0.baw163.43","doi-asserted-by":"crossref","unstructured":"Christen P. Goiser K. (2007) Quality Measures in Data Mining. Springer, Berlin, pp. 127\u2013151.","DOI":"10.1007\/978-3-540-44918-8_6"},{"key":"2017011117351194000_2017.0.baw163.44","doi-asserted-by":"crossref","unstructured":"Martins B. (2011) GeoSpatial Semantics. Springer, Berlin, pp. 34\u201351.","DOI":"10.1007\/978-3-642-20630-6_3"},{"key":"2017011117351194000_2017.0.baw163.45","first-page":"721","article-title":"AMIA Annual Symposium Proceedings","volume":"2013","author":"Joffe","year":"2013","journal-title":"American Medical Informatics Association"},{"key":"2017011117351194000_2017.0.baw163.46","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-15-187"},{"key":"2017011117351194000_2017.0.baw163.47","unstructured":"Koh J.L. (2007), Correlation-Based Methods for Biological Data Cleaning, PhD thesis, National university of Singapore."},{"key":"2017011117351194000_2017.0.baw163.48","unstructured":"UniProt Consortium. and others. (2014) UniProt: a hub for protein information. Nucleic Acids Res., 43:D204\u2013D212."},{"key":"2017011117351194000_2017.0.baw163.49","doi-asserted-by":"publisher","DOI":"10.1186\/gb-2002-3-2-reviews0003"},{"key":"2017011117351194000_2017.0.baw163.50","doi-asserted-by":"crossref","first-page":"D733","DOI":"10.1093\/nar\/gkv1189","article-title":"Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation","volume":"44","author":"O'Leary","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2017011117351194000_2017.0.baw163.51","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkv1226"},{"key":"2017011117351194000_2017.0.baw163.52","doi-asserted-by":"crossref","unstructured":"Chen Q. Jobel J. Verspoor K. (2016) Benchmarks for Measurement of Duplicate Detection Methods in Nucleotide Databases. Database, doi: http:\/\/dx.doi.org\/10.1101\/085324.","DOI":"10.1101\/085324"},{"key":"2017011117351194000_2017.0.baw163.53","doi-asserted-by":"crossref","unstructured":"Chen Q. Zobel J. Verspoor K. (2015) Evaluation of a Machine Learning Duplicate Detection Method for Bioinformatics Databases. ACM Ninth International Workshop on Data and Text Mining in Biomedical Informatics in conjunction with CIKM, October 19\u201323, 2015, Melbourne, VIC, Australia. ACM Press, New York.","DOI":"10.1145\/2811163.2811175"},{"key":"2017011117351194000_2017.0.baw163.54","doi-asserted-by":"publisher","DOI":"10.1093\/oxfordjournals.molbev.a003886"},{"key":"2017011117351194000_2017.0.baw163.55","doi-asserted-by":"crossref","first-page":"1956","DOI":"10.1093\/clinchem\/47.11.1956","article-title":"Oligonucleotide melting temperatures under PCR conditions: nearest-neighbor corrections for Mg2+, deoxynucleotide triphosphate, and dimethyl sulfoxide concentrations with comparison to alternative empirical formulas","volume":"47","author":"Ahsen","year":"2001","journal-title":"Clin. Chem"},{"key":"2017011117351194000_2017.0.baw163.56","doi-asserted-by":"crossref","first-page":"695","DOI":"10.1128\/AEM.59.3.695-700.1993","article-title":"Profiling of complex microbial populations by denaturing gradient gel electrophoresis analysis of polymerase chain reaction-amplified genes coding for 16S rRNA","volume":"59","author":"Muyzer","year":"1993","journal-title":"Appl. Environ. Microbiol"},{"key":"2017011117351194000_2017.0.baw163.57","doi-asserted-by":"publisher","DOI":"10.1046\/j.1462-2920.2002.00362.x"},{"key":"2017011117351194000_2017.0.baw163.58","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gks001"},{"key":"2017011117351194000_2017.0.baw163.59","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.85.2400"},{"key":"2017011117351194000_2017.0.baw163.60","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pgen.1004941"},{"key":"2017011117351194000_2017.0.baw163.61","first-page":"e26204","article-title":"Analysis of simple and imperfect microsatellites in Ebolavirus species and other genomes of Filoviridae family","volume":"2","author":"Mashhood","year":"2015","journal-title":"Gene Cell Tissue"},{"key":"2017011117351194000_2017.0.baw163.62","doi-asserted-by":"publisher","DOI":"10.1021\/ja0025806"},{"key":"2017011117351194000_2017.0.baw163.63","doi-asserted-by":"publisher","DOI":"10.1111\/nph.12790"},{"key":"2017011117351194000_2017.0.baw163.64","doi-asserted-by":"publisher","DOI":"10.1093\/database\/bau016"},{"key":"2017011117351194000_2017.0.baw163.65","unstructured":"Tavallaee M. Bagheri E. Lu W. Ghorbani A.A. (2009) Proceedings of the Second IEEE Symposium on Computational Intelligence for Security and Defence Applications 2009."},{"key":"2017011117351194000_2017.0.baw163.66","unstructured":"Bilenko M. Mooney R.J. (2003) Proceedings of the KDD-2003 Workshop on Data Cleaning, Record Linkage, and Object Consolidation, Washington, DC, pp. 7\u201312."},{"key":"2017011117351194000_2017.0.baw163.67","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2007.250581"}],"container-title":["Database"],"original-title":[],"language":"en","deposited":{"date-parts":[[2020,9,27]],"date-time":"2020-09-27T09:45:04Z","timestamp":1601199904000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/database\/article-lookup\/doi\/10.1093\/database\/baw163"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"references-count":67,"alternative-id":["10.1093\/database\/baw163"],"URL":"https:\/\/doi.org\/10.1093\/database\/baw163","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/085019","asserted-by":"object"}]},"ISSN":["1758-0463"],"issn-type":[{"value":"1758-0463","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]}}}