{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T23:37:25Z","timestamp":1773445045063,"version":"3.50.1"},"reference-count":48,"publisher":"Oxford University Press (OUP)","issue":"18","license":[{"start":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:00:00Z","timestamp":1659484800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001700","name":"Ministry of Education, Culture, Sports, Science, and Technology of Japan","doi-asserted-by":"crossref","award":["21H03538"],"award-info":[{"award-number":["21H03538"]}],"id":[{"id":"10.13039\/501100001700","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001700","name":"Ministry of Education, Culture, Sports, Science, and Technology of Japan","doi-asserted-by":"crossref","award":["21K19495"],"award-info":[{"award-number":["21K19495"]}],"id":[{"id":"10.13039\/501100001700","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001700","name":"Ministry of Education, Culture, Sports, Science, and Technology of Japan","doi-asserted-by":"crossref","award":["22H00477"],"award-info":[{"award-number":["22H00477"]}],"id":[{"id":"10.13039\/501100001700","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"name":"JSPS KAKENHI","award":["JP21K12104"],"award-info":[{"award-number":["JP21K12104"]}]},{"DOI":"10.13039\/100009619","name":"Japan Agency for Medical Research and Development","doi-asserted-by":"publisher","award":["21fk0108619h0001"],"award-info":[{"award-number":["21fk0108619h0001"]}],"id":[{"id":"10.13039\/100009619","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100009619","name":"Japan Agency for Medical Research and Development","doi-asserted-by":"publisher","award":["21ae0121048h0001"],"award-info":[{"award-number":["21ae0121048h0001"]}],"id":[{"id":"10.13039\/100009619","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100008732","name":"Uehara Memorial Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008732","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,9,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Bacteriophages\/phages are the viruses that infect and replicate within bacteria and archaea, and rich in human body. To investigate the relationship between phages and microbial communities, the identification of phages from metagenome sequences is the first step. Currently, there are two main methods for identifying phages: database-based (alignment-based) methods and alignment-free methods. Database-based methods typically use a large number of sequences as references; alignment-free methods usually learn the features of the sequences with machine learning and deep learning models.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We propose INHERIT which uses a deep representation learning model to integrate both database-based and alignment-free methods, combining the strengths of both. Pre-training is used as an alternative way of acquiring knowledge representations from existing databases, while the BERT-style deep learning framework retains the advantage of alignment-free methods. We compare INHERIT with four existing methods on a third-party benchmark dataset. Our experiments show that INHERIT achieves a better performance with the F1-score of 0.9932. In addition, we find that pre-training two species separately helps the non-alignment deep learning model make more accurate predictions.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The codes of INHERIT are now available in: https:\/\/github.com\/Celestial-Bai\/INHERIT.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btac509","type":"journal-article","created":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T10:39:47Z","timestamp":1659523187000},"page":"4264-4270","source":"Crossref","is-referenced-by-count":27,"title":["Identification of bacteriophage genome sequences with representation learning"],"prefix":"10.1093","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4921-0936","authenticated-orcid":false,"given":"Zeheng","family":"Bai","sequence":"first","affiliation":[{"name":"Division of Health Medical Intelligence, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yao-zhong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Division of Health Medical Intelligence, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoru","family":"Miyano","sequence":"additional","affiliation":[{"name":"Division of Health Medical Intelligence, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"},{"name":"M&D Data Science Center, Tokyo Medical and Dental University , Tokyo 113-8510, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rui","family":"Yamaguchi","sequence":"additional","affiliation":[{"name":"Division of Health Medical Intelligence, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"},{"name":"Division of Cancer Systems Biology, Aichi Cancer Center Research Institute , Nagoya 464-8681, Japan"},{"name":"Division of Cancer Informatics, Nagoya University Graduate School of Medicine , Nagoya 466-8560, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kosuke","family":"Fujimoto","sequence":"additional","affiliation":[{"name":"Division of Metagenome Medicine, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"},{"name":"Collaborative Research Institute for Innovative Microbiology, The University of Tokyo , Bunkyo-ku, Tokyo 113-8657, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoshi","family":"Uematsu","sequence":"additional","affiliation":[{"name":"Division of Metagenome Medicine, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"},{"name":"Collaborative Research Institute for Innovative Microbiology, The University of Tokyo , Bunkyo-ku, Tokyo 113-8657, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2989-308X","authenticated-orcid":false,"given":"Seiya","family":"Imoto","sequence":"additional","affiliation":[{"name":"Division of Health Medical Intelligence, Human Genome Center, The Institute of Medical Science, The University of Tokyo , Minato-ku, Tokyo 108-8639, Japan"},{"name":"Collaborative Research Institute for Innovative Microbiology, The University of Tokyo , Bunkyo-ku, Tokyo 113-8657, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2022,8,3]]},"reference":[{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"260","DOI":"10.1038\/nrmicro2319","article-title":"Antibiotic resistance and its cost: is it possible to reverse resistance?","volume":"8","author":"Andersson","year":"2010","journal-title":"Nat. Rev. Microbiol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"e121","DOI":"10.1093\/nar\/gkaa856","article-title":"Seeker: alignment-free identification of bacteriophage genomes by deep learning","volume":"48","author":"Auslander","year":"2020","journal-title":"Nucleic Acids Res"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","article-title":"Representation learning: a review and new perspectives","volume":"35","author":"Bengio","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"689","DOI":"10.1002\/jctb.438","article-title":"Phages and their application against drug-resistant bacteria","volume":"76","author":"Chanishvili","year":"2001","journal-title":"J. Chem. Technol. Biotechnol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"1009","DOI":"10.1093\/bib\/bbv099","article-title":"Multiple sequence alignment modeling: methods and applications","volume":"17","author":"Chatzou","year":"2016","journal-title":"Brief. Bioinform"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"417","DOI":"10.1128\/MMBR.00016-10","article-title":"Origins and evolution of antibiotic resistance","volume":"74","author":"Davies","year":"2010","journal-title":"Microbiol. Mol. Biol. Rev"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"837","DOI":"10.2307\/2531595","article-title":"Comparing the areas under two or more correlated receiver operating characteristic curves: a nonparametric approach","volume":"44","author":"DeLong","year":"1988","journal-title":"Biometrics"},{"key":"2023041408235147100_","author":"Devlin","year":"2018"},{"key":"2023041408235147100_","author":"Dodge","year":"2020"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1093\/bioinformatics\/14.9.755","article-title":"Profile hidden Markov models","volume":"14","author":"Eddy","year":"1998","journal-title":"Bioinformatics"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"368","DOI":"10.1016\/j.sbi.2006.04.004","article-title":"Multiple sequence alignment","volume":"16","author":"Edgar","year":"2006","journal-title":"Curr. Opin. Struct. Biol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1038\/nrmicro1163","article-title":"Viral metagenomics","volume":"3","author":"Edwards","year":"2005","journal-title":"Nat. Rev. Microbiol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"giz066","DOI":"10.1093\/gigascience\/giz066","article-title":"PPR-Meta: a tool for identifying phages and plasmids from metagenomic fragments using deep learning","volume":"8","author":"Fang","year":"2019","journal-title":"GigaScience"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1038\/21119","article-title":"Marine viruses and their biogeochemical and ecological effects","volume":"399","author":"Fuhrman","year":"1999","journal-title":"Nature"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"129","DOI":"10.3855\/jidc.3573","article-title":"Bacteriophage therapy: a potential solution for the antibiotic resistance crisis","volume":"8","author":"Golkar","year":"2014","journal-title":"J. Infect. Dev. Ctries"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40168-020-00990-y","article-title":"VirSorter2: a multi-classifier, expert-guided approach to detect diverse DNA and RNA viruses","volume":"9","author":"Guo","year":"2021","journal-title":"Microbiome"},{"key":"2023041408235147100_","author":"Gururangan","year":"2020"},{"key":"2023041408235147100_","author":"Ho","year":"2021"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1186\/1471-2105-11-119","article-title":"Prodigal: prokaryotic gene recognition and translation initiation site identification","volume":"11","author":"Hyatt","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"429","DOI":"10.3233\/IDA-2002-6504","article-title":"The class imbalance problem: a systematic study","volume":"6","author":"Japkowicz","year":"2002","journal-title":"IDA"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"2112","DOI":"10.1093\/bioinformatics\/btab083","article-title":"DNABERT: pre-trained bidirectional encoder representations from transformers model for DNA-language in genome","volume":"37","author":"Ji","year":"2021","journal-title":"Bioinformatics"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40168-020-00867-0","article-title":"Vibrant: automated recovery, annotation and curation of microbial viruses, and evaluation of viral community function from genomic sequences","volume":"8","author":"Kieft","year":"2020","journal-title":"Microbiome"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1016\/j.chom.2019.01.014","article-title":"Phage therapy: a renewed approach to combat antibiotic-resistant bacteria","volume":"25","author":"Kortright","year":"2019","journal-title":"Cell Host Microbe"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"69","DOI":"10.2174\/138920110790725401","article-title":"Phage therapy in clinical practice: treatment of human infections","volume":"11","author":"Kutter","year":"2010","journal-title":"Curr. Pharm. Biotechnol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"1057","DOI":"10.1016\/S1473-3099(13)70318-9","article-title":"Antibiotic resistance-the need for global solutions","volume":"13","author":"Laxminarayan","year":"2013","journal-title":"Lancet. Infect. Dis"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"2278","DOI":"10.1109\/5.726791","article-title":"Gradient-based learning applied to document recognition","volume":"86","author":"Lecun","year":"1998","journal-title":"Proc. IEEE"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"2369","DOI":"10.1056\/NEJMoa1408913","article-title":"Burden of clostridium difficile infection in the United States","volume":"372","author":"Lessa","year":"2015","journal-title":"N. Engl. J. Med"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"162","DOI":"10.4292\/wjgpt.v8.i3.162","article-title":"Phage therapy: an alternative to antibiotics in the age of multi-drug resistance","volume":"8","author":"Lin","year":"2017","journal-title":"World J. Gastrointest. Pharmacol. Ther"},{"key":"2023041408235147100_","author":"Liu","year":"2021"},{"key":"2023041408235147100_","author":"Mao","year":"2020"},{"key":"2023041408235147100_","author":"Marquet","year":"2020"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1038\/nrmicro.2017.30","article-title":"M\u00e9nage trois in the human gut: interactions between host, bacteria and phages","volume":"15","author":"Mirzaei","year":"2017","journal-title":"Nat. Rev. Microbiol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1128\/microbiolspec.VMBF-0016-2015","article-title":"Mechanisms of antibiotic resistance","volume":"4","author":"Munita","year":"2016","journal-title":"Microbiol. Spectr"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3434237","article-title":"A comprehensive survey on word representation models: from classical to state-of-the-art word representation language models","volume":"20","author":"Naseem","year":"2021","journal-title":"Trans. Asian Low-Resour. Lang. Inf. Process"},{"key":"2023041408235147100_","author":"O\u2019Shea","year":"2015"},{"key":"2023041408235147100_","first-page":"2825","article-title":"Scikit-learn: machine learning in python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J. Mach. Learn. Res"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"684","DOI":"10.1093\/femsre\/fuaa017","article-title":"Current challenges and future opportunities of phage therapy","volume":"44","author":"Pires","year":"2020","journal-title":"FEMS Microbiol. Rev"},{"key":"2023041408235147100_","author":"Radford","year":"2018"},{"key":"2023041408235147100_","first-page":"8844","volume-title":"Proceedings of Machine Learning Research,","author":"Rao","year":"2021"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"64","DOI":"10.1007\/s40484-019-0187-4","article-title":"Identifying viruses from metagenomic data using deep learning","volume":"8","author":"Ren","year":"2020","journal-title":"Quant. Biol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"607","DOI":"10.1038\/nrmicro2853","article-title":"Going viral: next-generation sequencing applied to phage populations in the human gut","volume":"10","author":"Reyes","year":"2012","journal-title":"Nat. Rev. Microbiol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"828","DOI":"10.1038\/nrmicro2235","article-title":"Explaining microbial population genomics through phage predation","volume":"7","author":"Rodriguez-Valera","year":"2009","journal-title":"Nat. Rev. Microbiol"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1038\/nature08060","article-title":"Viruses manipulate the marine environment","volume":"459","author":"Rohwer","year":"2009","journal-title":"Nature"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"807","DOI":"10.1016\/S1359-6446(05)03441-0","article-title":"Phage therapy: an attractive option for dealing with antibiotic-resistant bacterial infections","volume":"10","author":"Sulakvelidze","year":"2005","journal-title":"Drug Discov. Today"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1016\/j.ins.2019.11.004","article-title":"Data imbalance in classification: experimental evaluation","volume":"513","author":"Thabtah","year":"2020","journal-title":"Inf. Sci"},{"key":"2023041408235147100_","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2023041408235147100_","first-page":"38","author":"Wolf","year":"2020"},{"key":"2023041408235147100_","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1109\/TBDATA.2018.2850013","article-title":"Network representation learning: a survey","volume":"6","author":"Zhang","year":"2018","journal-title":"IEEE Trans. Big Data"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btac509\/45277953\/btac509.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/38\/18\/4264\/49884929\/btac509.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/38\/18\/4264\/49884929\/btac509.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,25]],"date-time":"2023-11-25T03:20:08Z","timestamp":1700882408000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/38\/18\/4264\/6654586"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,3]]},"references-count":48,"journal-issue":{"issue":"18","published-print":{"date-parts":[[2022,9,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btac509","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2021.09.25.461359","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2022,9,15]]},"published":{"date-parts":[[2022,8,3]]}}}