{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T04:44:52Z","timestamp":1776401092219,"version":"3.51.2"},"reference-count":36,"publisher":"Oxford University Press (OUP)","issue":"14","license":[{"start":{"date-parts":[[2018,3,8]],"date-time":"2018-03-08T00:00:00Z","timestamp":1520467200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["LP110200333"],"award-info":[{"award-number":["LP110200333"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP120104460"],"award-info":[{"award-number":["DP120104460"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["31701142"],"award-info":[{"award-number":["31701142"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Health and Medical Research Council of Australia","award":["APP1058540"],"award-info":[{"award-number":["APP1058540"]}]},{"DOI":"10.13039\/100000002","name":"National Institute of Allergy and Infectious Diseases of the National Institutes of Health","doi-asserted-by":"publisher","award":["R01 AI111965"],"award-info":[{"award-number":["R01 AI111965"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Major Inter-Disciplinary Research"},{"DOI":"10.13039\/501100001779","name":"Monash University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001779","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100011221","name":"UAB School of Medicine","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100011221","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,7,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>Structural and physiochemical descriptors extracted from sequence data have been widely used to represent sequences and predict structural, functional, expression and interaction profiles of proteins and peptides as well as DNAs\/RNAs. Here, we present iFeature, a versatile Python-based toolkit for generating various numerical feature representation schemes for both protein and peptide sequences. iFeature is capable of calculating and extracting a comprehensive spectrum of 18 major sequence encoding schemes that encompass 53 different types of feature descriptors. It also allows users to extract specific amino acid properties from the AAindex database. Furthermore, iFeature integrates 12 different types of commonly used feature clustering, selection and dimensionality reduction algorithms, greatly facilitating training, analysis and benchmarking of machine-learning models. The functionality of iFeature is made freely available via an online web server and a stand-alone toolkit.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>http:\/\/iFeature.erc.monash.edu\/; https:\/\/github.com\/Superzchen\/iFeature\/.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/bty140","type":"journal-article","created":{"date-parts":[[2018,3,7]],"date-time":"2018-03-07T04:10:38Z","timestamp":1520395838000},"page":"2499-2502","source":"Crossref","is-referenced-by-count":632,"title":["<i>iFeature<\/i>: a Python package and web server for features extraction and selection from protein and peptide sequences"],"prefix":"10.1093","volume":"34","author":[{"given":"Zhen","family":"Chen","sequence":"first","affiliation":[{"name":"School of Basic Medical Science, Qingdao University, 38 Dengzhou Road, Qingdao, China"}]},{"given":"Pei","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Cotton Biology, Institute of Cotton Research of Chinese Academy of Agricultural Sciences (CAAS), Anyang, China"}]},{"given":"Fuyi","family":"Li","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute and Department of Biochemistry and Molecular Biology, Monash University, Melbourne, VIC, Australia"}]},{"given":"Andr\u00e9","family":"Leier","sequence":"additional","affiliation":[{"name":"Department of Genetics, School of Medicine, University of Alabama at Birmingham, AL, USA"},{"name":"Department of Cell, Developmental and Integrative Biology, School of Medicine, University of Alabama at Birmingham, AL, USA"}]},{"given":"Tatiana T","family":"Marquez-Lago","sequence":"additional","affiliation":[{"name":"Department of Genetics, School of Medicine, University of Alabama at Birmingham, AL, USA"},{"name":"Department of Cell, Developmental and Integrative Biology, School of Medicine, University of Alabama at Birmingham, AL, USA"}]},{"given":"Yanan","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Image Processing and Pattern Recognition, Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Geoffrey I","family":"Webb","sequence":"additional","affiliation":[{"name":"Monash Centre for Data Science, Faculty of Information Technology, Monash University, Melbourne, VIC, Australia"}]},{"given":"A Ian","family":"Smith","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute and Department of Biochemistry and Molecular Biology, Monash University, Melbourne, VIC, Australia"}]},{"given":"Roger J","family":"Daly","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute and Department of Biochemistry and Molecular Biology, Monash University, Melbourne, VIC, Australia"}]},{"given":"Kuo-Chen","family":"Chou","sequence":"additional","affiliation":[{"name":"Gordon Life Science Institute, Boston, MA, USA"},{"name":"Center for Informational Biology, School of Life Science and Technology, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8031-9086","authenticated-orcid":false,"given":"Jiangning","family":"Song","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute and Department of Biochemistry and Molecular Biology, Monash University, Melbourne, VIC, Australia"},{"name":"Monash Centre for Data Science, Faculty of Information Technology, Monash University, Melbourne, VIC, Australia"}]}],"member":"286","published-online":{"date-parts":[[2018,3,8]]},"reference":[{"key":"2023012713012398600_bty140-B1","doi-asserted-by":"crossref","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","article-title":"Gapped BLAST and PSI-BLAST: a new generation of protein database search programs","volume":"25","author":"Altschul","year":"1997","journal-title":"Nucleic Acids Res"},{"key":"2023012713012398600_bty140-B2","doi-asserted-by":"crossref","first-page":"1714","DOI":"10.1093\/bioinformatics\/btq267","article-title":"Prediction of protease substrates using sequence and structure features","volume":"26","author":"Barkan","year":"2010","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B3","doi-asserted-by":"crossref","DOI":"10.1515\/9781400874668","volume-title":"Adaptive Control Processes: A Guided Tour","author":"Bellman","year":"1961"},{"key":"2023012713012398600_bty140-B4","doi-asserted-by":"crossref","first-page":"23262","DOI":"10.1074\/jbc.M401932200","article-title":"Classification of nuclear receptors based on amino acid composition and dipeptide composition","volume":"279","author":"Bhasin","year":"2004","journal-title":"J. Biol. Chem"},{"key":"2023012713012398600_bty140-B5","doi-asserted-by":"crossref","first-page":"960","DOI":"10.1093\/bioinformatics\/btt072","article-title":"propy: a tool to generate various modes of Chou\u2019s PseAAC","volume":"29","author":"Cao","year":"2013","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B6","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1093\/bioinformatics\/btu624","article-title":"Rcpi: r \/Bioconductor package to generate various descriptors of proteins, compounds and their interactions","volume":"31","author":"Cao","year":"2015","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B7","doi-asserted-by":"crossref","first-page":"1614","DOI":"10.1093\/bioinformatics\/btt196","article-title":"Incorporating key position and amino acid residue features to identify general and species-specific Ubiquitin conjugation sites","volume":"29","author":"Chen","year":"2013","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B8","doi-asserted-by":"crossref","first-page":"1461","DOI":"10.1016\/j.bbapap.2013.04.006","article-title":"hCKSAAP_UbSite: improved prediction of human ubiquitination sites by exploiting amino acid pattern and properties","volume":"1834","author":"Chen","year":"2013","journal-title":"Biochim. Biophys. Acta"},{"key":"2023012713012398600_bty140-B9","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1006\/bbrc.2000.3815","article-title":"Prediction of protein subcellular locations by incorporating quasi-sequence-order effect","volume":"278","author":"Chou","year":"2000","journal-title":"Biochem. Biophys. Res. Commun"},{"key":"2023012713012398600_bty140-B10","doi-asserted-by":"crossref","first-page":"246","DOI":"10.1002\/prot.1035","article-title":"Prediction of protein cellular attributes using pseudo-amino acid composition","volume":"43","author":"Chou","year":"2001","journal-title":"Proteins"},{"key":"2023012713012398600_bty140-B11","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1093\/bioinformatics\/bth466","article-title":"Using amphiphilic pseudo amino acid composition to predict enzyme subfamily classes","volume":"21","author":"Chou","year":"2005","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B12","doi-asserted-by":"crossref","first-page":"236","DOI":"10.1016\/j.jtbi.2010.12.024","article-title":"Some remarks on protein attribute prediction and pseudo amino acid composition","volume":"273","author":"Chou","year":"2011","journal-title":"J. Theor. Biol"},{"key":"2023012713012398600_bty140-B13","doi-asserted-by":"crossref","first-page":"1236","DOI":"10.1016\/j.bbrc.2004.06.073","article-title":"Prediction of protein subcellular locations by GO-FunD-PseAA predictor","volume":"320","author":"Chou","year":"2004","journal-title":"Biochem. Biophys. Res. Commun"},{"key":"2023012713012398600_bty140-B14","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1038\/nprot.2007.494","article-title":"Cell-PLoc: a package of Web servers for predicting subcellular localization of proteins in various organisms","volume":"3","author":"Chou","year":"2008","journal-title":"Nat. Protoc"},{"key":"2023012713012398600_bty140-B15","first-page":"45","article-title":"Prediction of the secondary structure of proteins from their amino acid sequence","volume":"47","author":"Chou","year":"1978","journal-title":"Adv. Enzymol. Relat. Areas Mol. Biol"},{"key":"2023012713012398600_bty140-B16","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1016\/j.ab.2012.03.015","article-title":"PseAAC-Builder: a cross-platform stand-alone program for generating various special Chou\u2019s pseudo-amino acid compositions","volume":"425","author":"Du","year":"2012","journal-title":"Anal. Biochem"},{"key":"2023012713012398600_bty140-B17","doi-asserted-by":"crossref","first-page":"3495","DOI":"10.3390\/ijms15033495","article-title":"PseAAC-General: fast building various modes of general form of Chou\u2019s pseudo-amino acid composition for large-scale protein datasets","volume":"15","author":"Du","year":"2014","journal-title":"Int. J. Mol. Sci"},{"key":"2023012713012398600_bty140-B18","doi-asserted-by":"crossref","first-page":"8700","DOI":"10.1073\/pnas.92.19.8700","article-title":"Prediction of protein folding class using global description of amino acid sequence","volume":"92","author":"Dubchak","year":"1995","journal-title":"Proc. Natl. Acad. Sci. USA"},{"key":"2023012713012398600_bty140-B19","doi-asserted-by":"crossref","first-page":"401","DOI":"10.1002\/(SICI)1097-0134(19990601)35:4<401::AID-PROT3>3.0.CO;2-K","article-title":"Recognition of a protein fold in the context of the Structural Classification of Proteins (SCOP) classification","volume":"35","author":"Dubchak","year":"1999","journal-title":"Proteins"},{"key":"2023012713012398600_bty140-B20","first-page":"D202","article-title":"AAindex: amino acid index database, progress report 2008","volume":"36 (Database issue)","author":"Kawashima","year":"2008","journal-title":"Nucleic Acids Res"},{"key":"2023012713012398600_bty140-B21","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1093\/bib\/bbk007","article-title":"Machine learning in bioinformatics","volume":"7","author":"Larranaga","year":"2006","journal-title":"Brief. Bioinform"},{"key":"2023012713012398600_bty140-B22","doi-asserted-by":"crossref","first-page":"e17331","DOI":"10.1371\/journal.pone.0017331","article-title":"Incorporating distant sequence features and radial basis function networks to identify ubiquitin conjugation sites","volume":"6","author":"Lee","year":"2011","journal-title":"PLoS One"},{"key":"2023012713012398600_bty140-B23","doi-asserted-by":"crossref","first-page":"W32","DOI":"10.1093\/nar\/gkl305","article-title":"PROFEAT: a web server for computing structural and physicochemical features of proteins and peptides from amino acid sequence","volume":"34","author":"Li","year":"2006","journal-title":"Nucleic Acids Res"},{"key":"2023012713012398600_bty140-B24","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1038\/nrg3920","article-title":"Machine learning applications in genetics and genomics","volume":"16","author":"Libbrecht","year":"2015","journal-title":"Nat. Rev. Genet"},{"key":"2023012713012398600_bty140-B25","doi-asserted-by":"crossref","first-page":"552","DOI":"10.2174\/1573406413666170515120507","article-title":"iPGK-PseAAC: identify lysine phosphoglycerylation sites in proteins by incorporating four different tiers of amino acid pairwise coupling information into the general PseAAC","volume":"13","author":"Liu","year":"2017","journal-title":"Med. Chem"},{"key":"2023012713012398600_bty140-B26","doi-asserted-by":"crossref","first-page":"W385","DOI":"10.1093\/nar\/gkr284","article-title":"Update of PROFEAT: a web server for computing structural and physicochemical features of proteins and peptides from amino acid sequence","volume":"39","author":"Rao","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"2023012713012398600_bty140-B27","doi-asserted-by":"crossref","first-page":"e1000636","DOI":"10.1371\/journal.pcbi.1000636","article-title":"Combining structure and sequence information allows automated prediction of substrate specificities within enzyme families","volume":"6","author":"Rottig","year":"2010","journal-title":"PLoS Comput. Biol"},{"key":"2023012713012398600_bty140-B28","doi-asserted-by":"crossref","first-page":"648","DOI":"10.1089\/omi.2015.0095","article-title":"Harnessing computational biology for exact linear B-cell epitope prediction: a novel amino acid composition-based feature descriptor","volume":"19","author":"Saravanan","year":"2015","journal-title":"Omics"},{"key":"2023012713012398600_bty140-B29","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1016\/S0006-3495(94)80782-9","article-title":"The rational design of amino acid sequences by artificial neural networks and simulated molecular evolution: de novo design of an idealized leader peptidase cleavage site","volume":"66","author":"Schneider","year":"1994","journal-title":"Biophys. J"},{"key":"2023012713012398600_bty140-B30","doi-asserted-by":"crossref","first-page":"4337","DOI":"10.1073\/pnas.0607879104","article-title":"Predicting protein-protein interactions based only on sequences information","volume":"104","author":"Shen","year":"2007","journal-title":"Proc. Natl. Acad. Sci. USA"},{"key":"2023012713012398600_bty140-B31","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1016\/j.ab.2007.10.012","article-title":"PseAAC: a flexible web server for generating various kinds of protein pseudo amino acid composition","volume":"373","author":"Shen","year":"2008","journal-title":"Anal. Biochem"},{"key":"2023012713012398600_bty140-B32","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1002\/ajpa.20250","article-title":"Population structure inferred by local spatial autocorrelation: an example from an Amerindian tribal population","volume":"129","author":"Sokal","year":"2006","journal-title":"Am. J. Phys. Anthropol"},{"key":"2023012713012398600_bty140-B33","doi-asserted-by":"crossref","first-page":"752","DOI":"10.1093\/bioinformatics\/btq043","article-title":"Cascleave: towards more accurate prediction of caspase substrate cleavage sites","volume":"26","author":"Song","year":"2010","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B34","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1186\/1471-2105-9-310","article-title":"Computational identification of ubiquitylation sites from protein sequences","volume":"9","author":"Tung","year":"2008","journal-title":"BMC Bioinformatics"},{"key":"2023012713012398600_bty140-B35","doi-asserted-by":"crossref","first-page":"1857","DOI":"10.1093\/bioinformatics\/btv042","article-title":"protr\/ProtrWeb: r package and web server for generating various numerical representation schemes of protein sequences","volume":"31","author":"Xiao","year":"2015","journal-title":"Bioinformatics"},{"key":"2023012713012398600_bty140-B36","doi-asserted-by":"crossref","first-page":"122","DOI":"10.1093\/bioinformatics\/btw564","article-title":"PseKRAAC: a flexible web server for generating pseudo K-tuple reduced amino acids composition","volume":"33","author":"Zuo","year":"2017","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/34\/14\/2499\/48917581\/bioinformatics_34_14_2499.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/34\/14\/2499\/48917581\/bioinformatics_34_14_2499.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:52:34Z","timestamp":1674827554000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/34\/14\/2499\/4924718"}},"subtitle":[],"editor":[{"given":"Alfonso","family":"Valencia","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2018,3,8]]},"references-count":36,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2018,7,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/bty140","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2018,7,15]]},"published":{"date-parts":[[2018,3,8]]}}}