{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T00:07:21Z","timestamp":1775606841399,"version":"3.50.1"},"reference-count":27,"publisher":"Oxford University Press (OUP)","issue":"17","license":[{"start":{"date-parts":[[2017,5,10]],"date-time":"2017-05-10T00:00:00Z","timestamp":1494374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"DOI":"10.13039\/501100000925","name":"NHMRC","doi-asserted-by":"publisher","award":["1092262"],"award-info":[{"award-number":["1092262"]}],"id":[{"id":"10.13039\/501100000925","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000163","name":"ARC","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000163","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,9,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>Evolutionary information in the form of a Position-Specific Scoring Matrix (PSSM) is a widely used and highly informative representation of protein sequences. Accordingly, PSSM-based feature descriptors have been successfully applied to improve the performance of various predictors of protein attributes. Even though a number of algorithms have been proposed in previous studies, there is currently no universal web server or toolkit available for generating this wide variety of descriptors. Here, we present POSSUM (Position-Specific Scoring matrix-based feature generator for machine learning), a versatile toolkit with an online web server that can generate 21 types of PSSM-based feature descriptors, thereby addressing a crucial need for bioinformaticians and computational biologists. We envisage that this comprehensive toolkit will be widely used as a powerful tool to facilitate feature extraction, selection, and benchmarking of machine learning-based models, thereby contributing to a more effective analysis and modeling pipeline for bioinformatics research.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>http:\/\/possum.erc.monash.edu\/.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btx302","type":"journal-article","created":{"date-parts":[[2017,5,10]],"date-time":"2017-05-10T03:10:16Z","timestamp":1494385816000},"page":"2756-2758","source":"Crossref","is-referenced-by-count":186,"title":["POSSUM: a bioinformatics toolkit for generating numerical sequence feature descriptors based on PSSM profiles"],"prefix":"10.1093","volume":"33","author":[{"given":"Jiawei","family":"Wang","sequence":"first","affiliation":[{"name":"Biomedicine Discovery Institute, Monash University, VIC, Australia"}]},{"given":"Bingjiao","family":"Yang","sequence":"additional","affiliation":[{"name":"College of Mechanical Engineering, Yanshan University, Qinhuangdao, China"}]},{"given":"Jerico","family":"Revote","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute, Monash University, VIC, Australia"}]},{"given":"Andr\u00e9","family":"Leier","sequence":"additional","affiliation":[{"name":"Informatics Institute and Department of Genetics, School of Medicine, University of Alabama at Birmingham, Birmingham, AL, USA"}]},{"given":"Tatiana T","family":"Marquez-Lago","sequence":"additional","affiliation":[{"name":"Informatics Institute and Department of Genetics, School of Medicine, University of Alabama at Birmingham, Birmingham, AL, USA"}]},{"given":"Geoffrey","family":"Webb","sequence":"additional","affiliation":[{"name":"Monash Centre for Data Science, Faculty of Information Technology, Monash University, VIC, Australia"}]},{"given":"Jiangning","family":"Song","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute, Monash University, VIC, Australia"},{"name":"Monash Centre for Data Science, Faculty of Information Technology, Monash University, VIC, Australia"},{"name":"ARC Centre of Excellence for Advanced Molecular Imaging, Monash University, VIC, Australia"}]},{"given":"Kuo-Chen","family":"Chou","sequence":"additional","affiliation":[{"name":"Gordon Life Science Institute, Boston, MA, USA"},{"name":"Center for Informational Biology, University of Electronic Science and Technology of China, Chengdu, China"},{"name":"Center of Excellence in Genomic Medicine Research (CEGMR), King Abdulaziz University, Jeddah, Saudi Arabia"}]},{"given":"Trevor","family":"Lithgow","sequence":"additional","affiliation":[{"name":"Biomedicine Discovery Institute, Monash University, VIC, Australia"}]}],"member":"286","published-online":{"date-parts":[[2017,5,10]]},"reference":[{"key":"2023020301105199100_btx302-B1","doi-asserted-by":"crossref","first-page":"bbw100.","DOI":"10.1093\/bib\/bbw100","article-title":"Comprehensive assessment and performance improvement of effector protein predictors for bacterial secretion systems III, IV and VI","author":"An","year":"2016","journal-title":"Brief Bioinform"},{"key":"2023020301105199100_btx302-B2","doi-asserted-by":"crossref","first-page":"960","DOI":"10.1093\/bioinformatics\/btt072","article-title":"Propy: a tool to generate various modes of Chou's PseAAC","volume":"29","author":"Cao","year":"2013","journal-title":"Bioinformatics"},{"key":"2023020301105199100_btx302-B3","doi-asserted-by":"crossref","first-page":"S6.","DOI":"10.1186\/1471-2105-9-S12-S6","article-title":"Predicting RNA-binding sites of proteins using support vector machines and evolutionary information","volume":"9","author":"Cheng","year":"2008","journal-title":"BMC Bioinform"},{"key":"2023020301105199100_btx302-B4","doi-asserted-by":"crossref","first-page":"236","DOI":"10.1016\/j.jtbi.2010.12.024","article-title":"Some remarks on protein attribute prediction and pseudo amino acid composition","volume":"273","author":"Chou","year":"2011","journal-title":"J. Theor. Biol"},{"key":"2023020301105199100_btx302-B5","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1016\/j.bbrc.2007.06.027","article-title":"MemType-2L: a web server for predicting membrane proteins and their types by incorporating evolution information through Pse-PSSM","volume":"360","author":"Chou","year":"2007","journal-title":"Biochem. Biophys. Res. Commun"},{"key":"2023020301105199100_btx302-B6","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1016\/j.biochi.2013.09.013","article-title":"A protein structural classes prediction method based on predicted secondary structure and PSI-BLAST profile","volume":"97","author":"Ding","year":"2014","journal-title":"Biochimie"},{"key":"2023020301105199100_btx302-B7","doi-asserted-by":"crossref","first-page":"2655","DOI":"10.1093\/bioinformatics\/btp500","article-title":"A new taxonomy-based protein fold recognition approach based on autocross-covariance transformation","volume":"25","author":"Dong","year":"2009","journal-title":"Bioinformatics"},{"key":"2023020301105199100_btx302-B8","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1109\/TCBB.2010.93","article-title":"On position-specific scoring matrix for protein function prediction","volume":"8","author":"Jeong","year":"2011","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinform.\/IEEE, ACM"},{"key":"2023020301105199100_btx302-B9","first-page":"836","author":"Juan","year":"2009"},{"key":"2023020301105199100_btx302-B10","doi-asserted-by":"crossref","first-page":"1307","DOI":"10.1093\/bioinformatics\/btu820","article-title":"repDNA: a Python package to generate various modes of feature vectors for DNA sequences by incorporating user-defined physicochemical properties and sequence-order effects","volume":"31","author":"Liu","year":"2015","journal-title":"Bioinformatics"},{"key":"2023020301105199100_btx302-B11","doi-asserted-by":"crossref","first-page":"473","DOI":"10.1007\/s00438-015-1078-7","article-title":"repRNA: a web server for generating various feature vectors of RNA sequences","volume":"291","author":"Liu","year":"2016","journal-title":"Mol. Genet. Genom. MGG"},{"key":"2023020301105199100_btx302-B12","doi-asserted-by":"crossref","first-page":"W65","DOI":"10.1093\/nar\/gkv458","article-title":"Pse-in-One: a web server for generating various modes of pseudo components of DNA, RNA, and protein sequences","volume":"43","author":"Liu","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023020301105199100_btx302-B100","doi-asserted-by":"crossref","first-page":"67","DOI":"10.4236\/ns.2017.94007","article-title":"Pse-in-One 2.0: an improved package of web servers for generating various modes of pseudo components of DNA, RNA, and protein sequences","volume":"9","author":"Liu","year":"2017","journal-title":"Natural Science"},{"key":"2023020301105199100_btx302-B13","doi-asserted-by":"crossref","first-page":"13338","DOI":"10.18632\/oncotarget.14524","article-title":"Pse-Analysis: a python package for DNA\/RNA and protein\/peptide sequence analysis based on pseudo components and kernel methods","volume":"8","author":"Liu","year":"2017","journal-title":"Oncotarget"},{"key":"2023020301105199100_btx302-B14","doi-asserted-by":"crossref","first-page":"1330","DOI":"10.1016\/j.biochi.2010.06.013","article-title":"Prediction of protein structural class for low-similarity sequences using support vector machine and PSI-BLAST profile","volume":"92","author":"Liu","year":"2010","journal-title":"Biochimie"},{"key":"2023020301105199100_btx302-B15","doi-asserted-by":"crossref","first-page":"1761","DOI":"10.1093\/bioinformatics\/btp302","article-title":"pGenTHREADER and pDomTHREADER: new methods for improved protein fold recognition and superfamily discrimination","volume":"25","author":"Lobley","year":"2009","journal-title":"Bioinformatics"},{"key":"2023020301105199100_btx302-B16","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1109\/TNB.2013.2296050","article-title":"A tri-gram based feature extraction technique using linear probabilities of position specific scoring matrix for protein fold recognition","volume":"13","author":"Paliwal","year":"2014","journal-title":"IEEE Trans. Nanobiosci"},{"key":"2023020301105199100_btx302-B17","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1038\/nmeth.2340","article-title":"A large-scale evaluation of computational protein function prediction","volume":"10","author":"Radivojac","year":"2013","journal-title":"Nat. Methods"},{"key":"2023020301105199100_btx302-B18","doi-asserted-by":"crossref","first-page":"W385","DOI":"10.1093\/nar\/gkr284","article-title":"Update of PROFEAT: a web server for computing structural and physicochemical features of proteins and peptides from amino acid sequence","volume":"39","author":"Rao","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"2023020301105199100_btx302-B19","first-page":"756","volume-title":"J Softw.","author":"Saini","year":"2016"},{"key":"2023020301105199100_btx302-B20","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1016\/j.ab.2007.10.012","article-title":"PseAAC: a flexible web server for generating various kinds of protein pseudo amino acid composition","volume":"373","author":"Shen","year":"2008","journal-title":"Anal. Biochem"},{"key":"2023020301105199100_btx302-B21","doi-asserted-by":"crossref","first-page":"3555","DOI":"10.1093\/bioinformatics\/btv042","article-title":"protr\/ProtrWeb: R package and web server for generating various numerical representation schemes of protein sequences","volume":"31","author":"Xiao","year":"2015","journal-title":"Bioinformatics"},{"key":"2023020301105199100_btx302-B22","doi-asserted-by":"crossref","first-page":"W105","DOI":"10.1093\/nar\/gki359","article-title":"LOCSVMPSI: a web server for subcellular localization of eukaryotic proteins using SVM and profile of PSI-BLAST","volume":"33","author":"Xie","year":"2005","journal-title":"Nucleic Acids Res"},{"key":"2023020301105199100_btx302-B23","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/j.ygeno.2013.05.006","article-title":"PPIevo: protein-protein interaction prediction from PSSM based evolutionary information","volume":"102","author":"Zahiri","year":"2013","journal-title":"Genomics"},{"key":"2023020301105199100_btx302-B24","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1016\/j.jtbi.2014.04.008","article-title":"Predict protein structural class for low-similarity sequences by evolutionary difference information into the general form of Chou's pseudo amino acid composition","volume":"355","author":"Zhang","year":"2014","journal-title":"J. Theor. Biol"},{"key":"2023020301105199100_btx302-B25","doi-asserted-by":"crossref","first-page":"634","DOI":"10.1080\/07391102.2011.672627","article-title":"Using principal component analysis and support vector machine to predict protein structural class for low-similarity sequences via PSSM","volume":"29","author":"Zhang","year":"2012","journal-title":"J. Biomol. Struct. Dyn"},{"key":"2023020301105199100_btx302-B26","doi-asserted-by":"crossref","first-page":"3135","DOI":"10.1093\/bioinformatics\/btt554","article-title":"Accurate prediction of bacterial type IV secreted effectors using amino acid composition and PSSM profiles","volume":"29","author":"Zou","year":"2013","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/17\/2756\/49040623\/bioinformatics_33_17_2756.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/17\/2756\/49040623\/bioinformatics_33_17_2756.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,3]],"date-time":"2023-02-03T01:11:44Z","timestamp":1675386704000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/33\/17\/2756\/3813283"}},"subtitle":[],"editor":[{"given":"John","family":"Hancock","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2017,5,10]]},"references-count":27,"journal-issue":{"issue":"17","published-print":{"date-parts":[[2017,9,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btx302","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2017,9,1]]},"published":{"date-parts":[[2017,5,10]]}}}