{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T19:32:58Z","timestamp":1774985578827,"version":"3.50.1"},"update-to":[{"DOI":"10.1371\/journal.pcbi.1007894","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T00:00:00Z","timestamp":1592784000000}}],"reference-count":60,"publisher":"Public Library of Science (PLoS)","issue":"5","license":[{"start":{"date-parts":[[2020,5,26]],"date-time":"2020-05-26T00:00:00Z","timestamp":1590451200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Medical Research Council","award":["MC UU 1201412"],"award-info":[{"award-number":["MC UU 1201412"]}]}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1007894","type":"journal-article","created":{"date-parts":[[2020,5,26]],"date-time":"2020-05-26T17:39:14Z","timestamp":1590514754000},"page":"e1007894","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":59,"title":["Predicting host taxonomic information from viral genomes: A comparison of feature representations"],"prefix":"10.1371","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5236-1145","authenticated-orcid":true,"given":"Francesca","family":"Young","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3578-4477","authenticated-orcid":true,"given":"Simon","family":"Rogers","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6338-0221","authenticated-orcid":true,"given":"David L.","family":"Robertson","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2020,5,26]]},"reference":[{"key":"pcbi.1007894.ref001","doi-asserted-by":"crossref","first-page":"804","DOI":"10.1038\/nature06244","article-title":"The human microbiome project: exploring the microbial part of ourselves in a changing world","volume":"449","author":"PJ Turnbaugh","year":"2007","journal-title":"Nature"},{"key":"pcbi.1007894.ref002","doi-asserted-by":"crossref","first-page":"1034","DOI":"10.1126\/science.1153213","article-title":"The Microbial Engines That Drive Earth\u2019s Biogeochemical Cycles","volume":"320","author":"PG Falkowski","year":"2008","journal-title":"Science"},{"key":"pcbi.1007894.ref003","doi-asserted-by":"crossref","first-page":"801","DOI":"10.1038\/nrmicro1750","article-title":"Marine viruses\u2014major players in the global ecosystem","volume":"5","author":"CA Suttle","year":"2007","journal-title":"Nat Rev Microbiol"},{"key":"pcbi.1007894.ref004","doi-asserted-by":"crossref","first-page":"e00111","DOI":"10.1128\/mSystems.00111-19","article-title":"A Viral Ecogenomics Framework To Uncover the Secrets of Nature\u2019s \u201cMicrobe Whisperers.\u201d","volume":"4","author":"S. Roux","year":"2019","journal-title":"mSystems"},{"key":"pcbi.1007894.ref005","article-title":"Viral dark matter and virus\u2013host interactions resolved from publicly available microbial genomes","volume":"4","author":"S Roux","journal-title":"eLife"},{"key":"pcbi.1007894.ref006","doi-asserted-by":"crossref","first-page":"D678","DOI":"10.1093\/nar\/gky1127","article-title":"IMG\/VR v.2.0: an integrated data management and analysis system for cultivated and environmental viral genomes","volume":"47","author":"D Paez-Espino","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1007894.ref007","doi-asserted-by":"crossref","first-page":"258","DOI":"10.1093\/femsre\/fuv048","article-title":"Computational approaches to predict bacteriophage\u2013host relationships","volume":"40","author":"RA Edwards","year":"2016","journal-title":"FEMS Microbiol Rev"},{"key":"pcbi.1007894.ref008","doi-asserted-by":"crossref","first-page":"4498","DOI":"10.1038\/ncomms5498","article-title":"A highly abundant bacteriophage discovered in the unknown sequences of human faecal metagenomes","volume":"5","author":"BE Dutilh","year":"2014","journal-title":"Nat Commun"},{"key":"pcbi.1007894.ref009","doi-asserted-by":"crossref","first-page":"3113","DOI":"10.1093\/bioinformatics\/btx383","article-title":"WIsH: who is the host? Predicting prokaryotic hosts from metagenomic phage contigs","volume":"33","author":"C Galiez","year":"2017","journal-title":"Bioinformatics"},{"key":"pcbi.1007894.ref010","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1093\/nar\/gkw1002","article-title":"Alignment-free d2* oligonucleotide frequency dissimilarity measure improves prediction of hosts from metagenomically-derived viral sequences","volume":"45","author":"NA Ahlgren","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1007894.ref011","doi-asserted-by":"crossref","first-page":"116","DOI":"10.3390\/v8050116","article-title":"HostPhinder: A Phage Host Prediction Tool","volume":"8","author":"J Villarroel","year":"2016","journal-title":"Viruses"},{"key":"pcbi.1007894.ref012","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1038\/nrg3920","article-title":"Machine learning applications in genetics and genomics","volume":"16","author":"MW Libbrecht","year":"2015","journal-title":"Nat Rev Genet"},{"key":"pcbi.1007894.ref013","volume-title":"Information Science and Statistics","author":"CM Bishop","year":"2006"},{"key":"pcbi.1007894.ref014","doi-asserted-by":"crossref","DOI":"10.3390\/v8030066","article-title":"Linking Virus Genomes with Host Taxonomy","volume":"8","author":"T Mihara","year":"2016","journal-title":"Viruses"},{"key":"pcbi.1007894.ref015","doi-asserted-by":"crossref","first-page":"4527","DOI":"10.1093\/nar\/gku075","article-title":"The influence of CpG and UpA dinucleotide frequencies on RNA virus replication and characterization of the innate cellular pathways underlying virus attenuation and enhanced replication","volume":"42","author":"NJ Atkinson","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1007894.ref016","first-page":"nature24140","article-title":"Evolution: Zapping viral RNAs","volume":"550","author":"SP Goff","year":"2017","journal-title":"Nature"},{"key":"pcbi.1007894.ref017","doi-asserted-by":"crossref","first-page":"610","DOI":"10.1186\/1471-2164-14-610","article-title":"Modelling mutational and selection pressures on dinucleotides in eukaryotic phyla\u2013selection against CpG and UpA in cytoplasmically expressed RNA and in RNA viruses","volume":"14","author":"P Simmonds","year":"2013","journal-title":"BMC Genomics"},{"key":"pcbi.1007894.ref018","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1007\/s00239-008-9068-6","article-title":"Codon Bias is a Major Factor Explaining Phage Evolution in Translationally Biased Hosts","volume":"66","author":"A. Carbone","year":"2008","journal-title":"J Mol Evol"},{"key":"pcbi.1007894.ref019","doi-asserted-by":"crossref","first-page":"17155","DOI":"10.1038\/srep17155","article-title":"Inferring the hosts of coronavirus using dual statistical models based on nucleotide composition","volume":"5","author":"Q Tang","year":"2015","journal-title":"Sci Rep"},{"key":"pcbi.1007894.ref020","doi-asserted-by":"crossref","first-page":"10032","DOI":"10.1038\/s41598-018-28308-x","article-title":"Comparative studies of alignment, alignment-free and SVM based approaches for predicting the hosts of viruses based on viral sequences","volume":"8","author":"H Li","year":"2018","journal-title":"Sci Rep"},{"key":"pcbi.1007894.ref021","doi-asserted-by":"crossref","first-page":"10322","DOI":"10.1128\/JVI.00601-10","article-title":"Use of Nucleotide Composition Analysis To Infer Hosts for Three Novel Picorna-Like Viruses","volume":"84","author":"A Kapoor","year":"2010","journal-title":"J Virol"},{"key":"pcbi.1007894.ref022","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1126\/science.aap9072","article-title":"Predicting reservoir hosts and arthropod vectors from evolutionary signatures in RNA virus genomes","volume":"362","author":"SA Babayan","year":"2018","journal-title":"Science"},{"key":"pcbi.1007894.ref023","doi-asserted-by":"crossref","first-page":"3436","DOI":"10.1038\/s41598-019-39847-2","article-title":"Host Taxon Predictor\u2014A Tool for Predicting Taxon of the Host of a Newly Discovered Virus","volume":"9","author":"W Ga\u0142an","year":"2019","journal-title":"Sci Rep"},{"key":"pcbi.1007894.ref024","doi-asserted-by":"crossref","DOI":"10.1186\/s12859-017-1473-7","article-title":"Prediction of virus-host infectious association by supervised learning methods","volume":"18","author":"M Zhang","year":"2017","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1007894.ref025","article-title":"Identifying Hosts of Families of Viruses: A Machine Learning Approach","volume":"6","author":"A Raj","year":"2011","journal-title":"PLoS ONE"},{"key":"pcbi.1007894.ref026","article-title":"Computational prediction of inter-species relationships through omics data analysis and machine learning","volume":"19","author":"DMC Leite","year":"2018","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1007894.ref027","doi-asserted-by":"crossref","first-page":"e1000079","DOI":"10.1371\/journal.ppat.1000079","article-title":"Patterns of Evolution and Host Gene Mimicry in Influenza and Other RNA Viruses","volume":"4","author":"BD Greenbaum","year":"2008","journal-title":"PLOS Pathog"},{"key":"pcbi.1007894.ref028","article-title":"Virus-Host Coevolution: Common Patterns of Nucleotide Motif Usage in Flaviviridae and Their Hosts","volume":"4","author":"FP Lobo","year":"2009","journal-title":"PLoS ONE"},{"key":"pcbi.1007894.ref029","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1186\/1471-2164-7-8","article-title":"Evidence of host-virus co-evolution in tetranucleotide usage patterns of bacteriophages and eukaryotic viruses","volume":"7","author":"DT Pride","year":"2006","journal-title":"BMC Genomics"},{"key":"pcbi.1007894.ref030","doi-asserted-by":"crossref","first-page":"458","DOI":"10.1016\/j.tim.2004.08.005","article-title":"The evolution of large DNA viruses: combining genomic information of viruses and their hosts","volume":"12","author":"LA Shackelton","year":"2004","journal-title":"Trends Microbiol"},{"key":"pcbi.1007894.ref031","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1016\/j.tibs.2010.10.002","article-title":"How viruses hijack cell regulation","volume":"36","author":"NE Davey","year":"2011","journal-title":"Trends Biochem Sci"},{"key":"pcbi.1007894.ref032","doi-asserted-by":"crossref","first-page":"501","DOI":"10.1016\/j.tim.2011.07.003","article-title":"Virus\u2013host interactomes and global models of virus-infected cells","volume":"19","author":"CC Friedel","year":"2011","journal-title":"Trends Microbiol"},{"key":"pcbi.1007894.ref033","doi-asserted-by":"crossref","DOI":"10.3389\/fmicb.2017.01557","article-title":"Protein\u2013Protein Interactions in Virus\u2013Host Systems","volume":"8","author":"AF Brito","year":"2017","journal-title":"Front Microbiol"},{"key":"pcbi.1007894.ref034","doi-asserted-by":"crossref","first-page":"10538","DOI":"10.1073\/pnas.1101440108","article-title":"Structural principles within the human-virus protein-protein interaction network","volume":"108","author":"EA Franzosa","year":"2011","journal-title":"Proc Natl Acad Sci"},{"key":"pcbi.1007894.ref035","doi-asserted-by":"crossref","DOI":"10.1186\/s12859-017-1570-7","article-title":"Prediction of virus-host protein-protein interactions mediated by short linear motifs","volume":"18","author":"A Becerra","year":"2017","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1007894.ref036","doi-asserted-by":"crossref","first-page":"e1005579","DOI":"10.1371\/journal.pcbi.1005579","article-title":"Structural host-microbiota interaction networks","volume":"13","author":"E Guven-Maiorov","year":"2017","journal-title":"PLOS Comput Biol"},{"key":"pcbi.1007894.ref037","first-page":"nrmicro2222","article-title":"The evolutionary conundrum of pathogen mimicry","volume":"7","author":"NC Elde","year":"2009","journal-title":"Nat Rev Microbiol"},{"key":"pcbi.1007894.ref038","doi-asserted-by":"crossref","first-page":"528","DOI":"10.1080\/10635150290069940","article-title":"Preferential host switching by primate lentiviruses can account for phylogenetic similarity with the primate phylogeny","volume":"51","author":"MA Charleston","year":"2002","journal-title":"Syst Biol"},{"key":"pcbi.1007894.ref039","doi-asserted-by":"crossref","first-page":"676","DOI":"10.1126\/science.1188836","article-title":"Host Phylogeny Constrains Cross-Species Emergence and Establishment of Rabies Virus in Bats","volume":"329","author":"DG Streicker","year":"2010","journal-title":"Science"},{"key":"pcbi.1007894.ref040","article-title":"Viral Phylogenomics Using an Alignment-Free Method: A Three-Step Approach to Determine Optimal Length of k-mer","volume":"7","author":"Q Zhang","year":"2017","journal-title":"Sci Rep"},{"key":"pcbi.1007894.ref041","article-title":"Identification and characterization of Coronaviridae genomes from Vietnamese bats and rats based on conserved protein domains","volume":"4","author":"MVT Phan","year":"2018","journal-title":"Virus Evol."},{"key":"pcbi.1007894.ref042","doi-asserted-by":"crossref","first-page":"3396","DOI":"10.1093\/bioinformatics\/btx440","article-title":"VICTOR: genome-based phylogeny and classification of prokaryotic viruses","volume":"33","author":"JP Meier-Kolthoff","year":"2017","journal-title":"Bioinformatics"},{"key":"pcbi.1007894.ref043","doi-asserted-by":"crossref","DOI":"10.1186\/s40168-018-0422-7","article-title":"The genomic underpinnings of eukaryotic virus taxonomy: creating a sequence-based framework for family-level virus classification","volume":"6","author":"P Aiewsakun","year":"2018","journal-title":"Microbiome"},{"key":"pcbi.1007894.ref044","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41467-018-07641-9","article-title":"High throughput ANI analysis of 90K prokaryotic genomes reveals clear species boundaries","volume":"9","author":"C Jain","year":"2018","journal-title":"Nat Commun"},{"key":"pcbi.1007894.ref045","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1038\/nbt.4306","article-title":"Minimum Information about an Uncultivated Virus Genome (MIUViG)","volume":"37","author":"S Roux","year":"2019","journal-title":"Nat Biotechnol"},{"key":"pcbi.1007894.ref046","doi-asserted-by":"crossref","first-page":"1464","DOI":"10.1093\/bioinformatics\/bti204","article-title":"Convergent evolution of domain architectures (is rare)","volume":"21","author":"J. Gough","year":"2005","journal-title":"Bioinforma Oxf Engl"},{"key":"pcbi.1007894.ref047","first-page":"27","article-title":"Learning the Kernel Matrix with Semidefinite Programming","volume":"5","author":"GRG Lanckriet","year":"2004","journal-title":"Journal of MachineLearningResearch"},{"key":"pcbi.1007894.ref048","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1146\/annurev-genet-110711-155522","article-title":"Rules of Engagement: Molecular Insights from Host-Virus Arms Races","volume":"46","author":"MD Daugherty","year":"2012","journal-title":"Annu Rev Genet"},{"key":"pcbi.1007894.ref049","first-page":"2014","article-title":"The Domain Landscape of Virus-Host Interactomes","author":"L-L Zheng","year":"2014","journal-title":"BioMed Res Int"},{"key":"pcbi.1007894.ref050","doi-asserted-by":"crossref","first-page":"3241","DOI":"10.1093\/bioinformatics\/bty351","article-title":"Universal evolutionary selection for high dimensional silent patterns of information hidden in the redundancy of viral genetic code","volume":"34","author":"E Goz","year":"2018","journal-title":"Bioinformatics"},{"key":"pcbi.1007894.ref051","doi-asserted-by":"crossref","DOI":"10.1128\/JVI.02381-16","article-title":"Dinucleotide Composition in Animal RNA Viruses Is Shaped More by Virus Family than by Host Species","volume":"91","author":"F Di Giallonardo","year":"2017","journal-title":"J Virol"},{"key":"pcbi.1007894.ref052","doi-asserted-by":"crossref","first-page":"e06416","DOI":"10.7554\/eLife.06416","article-title":"Whole genome comparison of a large collection of mycobacteriophages reveals a continuum of phage genetic diversity","volume":"4","author":"WH Pope","year":"2015","journal-title":"eLife"},{"key":"pcbi.1007894.ref053","doi-asserted-by":"crossref","first-page":"D700","DOI":"10.1093\/nar\/gkx1124","article-title":"MVP: a microbe\u2013phage interaction database","volume":"46","author":"NL Gao","year":"2018","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1007894.ref054","doi-asserted-by":"crossref","first-page":"D571","DOI":"10.1093\/nar\/gku1207","article-title":"NCBI Viral Genomes Resource","volume":"43","author":"JR Brister","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1007894.ref055","doi-asserted-by":"crossref","first-page":"1635","DOI":"10.1093\/molbev\/msw046","article-title":"ETE 3: Reconstruction, Analysis, and Visualization of Phylogenomic Data","volume":"33","author":"J Huerta-Cepas","year":"2016","journal-title":"Mol Biol Evol"},{"key":"pcbi.1007894.ref056","doi-asserted-by":"crossref","first-page":"4337","DOI":"10.1073\/pnas.0607879104","article-title":"Predicting protein\u2013protein interactions based only on sequences information","volume":"104","author":"J Shen","year":"2007","journal-title":"Proc Natl Acad Sci U S A"},{"key":"pcbi.1007894.ref057","doi-asserted-by":"crossref","first-page":"e1002195","DOI":"10.1371\/journal.pcbi.1002195","article-title":"Accelerated Profile HMM Searches","volume":"7","author":"SR Eddy","year":"2011","journal-title":"PLOS Comput Biol"},{"key":"pcbi.1007894.ref058","doi-asserted-by":"crossref","first-page":"W29","DOI":"10.1093\/nar\/gkr367","article-title":"HMMER web server: interactive sequence similarity searching","volume":"39","author":"RD Finn","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1007894.ref059","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/BF00994018","article-title":"Support-vector networks","volume":"20","author":"C Cortes","year":"1995","journal-title":"Mach Learn"},{"key":"pcbi.1007894.ref060","first-page":"2825","article-title":"Scikit-learn: Machine Learning in Python","volume":"12","author":"F Pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"}],"updated-by":[{"DOI":"10.1371\/journal.pcbi.1007894","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T00:00:00Z","timestamp":1592784000000}}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007894","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T17:45:18Z","timestamp":1592847918000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007894"}},"subtitle":[],"editor":[{"given":"Morgan","family":"Langille","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,5,26]]},"references-count":60,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2020,5,26]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1007894","relation":{"new_version":[{"id-type":"doi","id":"10.1371\/journal.pcbi.1007894","asserted-by":"object"}]},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,5,26]]}}}