{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,5]],"date-time":"2025-07-05T04:01:46Z","timestamp":1751688106172,"version":"3.41.0"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,5,3]],"date-time":"2018-05-03T00:00:00Z","timestamp":1525305600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["SFRH\/BPD\/92978\/2013"],"award-info":[{"award-number":["SFRH\/BPD\/92978\/2013"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"name":"European Regional Development Fund (PT) and FCT funds","award":["UID\/Multi\/04423\/2013, PTDC\/AAG-GLO\/6887\/2014"],"award-info":[{"award-number":["UID\/Multi\/04423\/2013, PTDC\/AAG-GLO\/6887\/2014"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s12859-018-2148-8","type":"journal-article","created":{"date-parts":[[2018,5,3]],"date-time":"2018-05-03T17:25:51Z","timestamp":1525368351000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Surveying alignment-free features for Ortholog detection in related yeast proteomes by using supervised big data classifiers"],"prefix":"10.1186","volume":"19","author":[{"given":"Deborah","family":"Galpert","sequence":"first","affiliation":[]},{"given":"Alberto","family":"Fern\u00e1ndez","sequence":"additional","affiliation":[]},{"given":"Francisco","family":"Herrera","sequence":"additional","affiliation":[]},{"given":"Agostinho","family":"Antunes","sequence":"additional","affiliation":[]},{"given":"Reinaldo","family":"Molina-Ruiz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9908-2418","authenticated-orcid":false,"given":"Guillermin","family":"Ag\u00fcero-Chapin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,5,3]]},"reference":[{"issue":"8","key":"2148_CR1","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1016\/S0169-5347(01)02206-6","volume":"16","author":"DP Mindell","year":"2001","unstructured":"Mindell DP, Meyer A. Homology evolving. Trends Ecol Evol. 2001;16(8):434\u201340.","journal-title":"Trends Ecol Evol"},{"issue":"11","key":"2148_CR2","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1016\/j.tig.2008.08.009","volume":"24","author":"A Kuzniar","year":"2008","unstructured":"Kuzniar A, van Ham RCHJ, Pongor S, Leunissen JAM. The quest for orthologs: finding the corresponding gene across genomes. Trends Genet. 2008;24(11):539\u201351.","journal-title":"Trends Genet"},{"key":"2148_CR3","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1016\/j.jtbi.2010.12.024","volume":"273","author":"K-C Chou","year":"2011","unstructured":"Chou K-C. Some remarks on protein attribute prediction and pseudo amino acid composition. J Theor Biol. 2011;273:236\u201347.","journal-title":"J Theor Biol"},{"issue":"4","key":"2148_CR4","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1093\/bioinformatics\/btg005","volume":"19","author":"S Vinga","year":"2003","unstructured":"Vinga S, Almeida J. Alignment-free sequence comparison\u2014a review. Bioinformatics. 2003;19(4):513\u201323.","journal-title":"Bioinformatics"},{"issue":"3","key":"2148_CR5","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1093\/bib\/bbu005","volume":"15","author":"S Vinga","year":"2014","unstructured":"Vinga S. Editorial: alignment-free methods in computational biology. Brief Bioinform. 2014;15(3):341\u20132.","journal-title":"Brief Bioinform"},{"key":"2148_CR6","doi-asserted-by":"publisher","first-page":"99","DOI":"10.2307\/2412448","volume":"19","author":"WM Fitch","year":"1970","unstructured":"Fitch WM. Distinguishing homologous from analogous proteins. Syst Zool Syst Biol. 1970;19:99\u2013113.","journal-title":"Syst Zool Syst Biol"},{"key":"2148_CR7","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1146\/annurev.genet.39.073003.114725","volume":"39","author":"EV Koonin","year":"2005","unstructured":"Koonin EV. Orthologs, paralogs, and evolutionary genomics. Annu Rev Genet. 2005;39:309\u201338.","journal-title":"Annu Rev Genet"},{"issue":"5338","key":"2148_CR8","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1126\/science.278.5338.631","volume":"278","author":"RL Tatusov","year":"1997","unstructured":"Tatusov RL, Koonin EV, Lipman DJ. A genomic perspective on protein families. Science. 1997;278(5338):631\u20137.","journal-title":"Science"},{"key":"2148_CR9","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1006\/jmbi.2000.5197","volume":"314","author":"M Remm","year":"2001","unstructured":"Remm M, Storm CEV, Sonnhammer ELL. Automatic clustering of Orthologs and in-paralogs from pairwise species comparisons. J Mol Biol. 2001;314:1041\u201352.","journal-title":"J Mol Biol"},{"key":"2148_CR10","doi-asserted-by":"publisher","first-page":"2178","DOI":"10.1101\/gr.1224503","volume":"13","author":"L Li","year":"2003","unstructured":"Li L, Stoeckert CJ, Roos DS. OrthoMCL: identification of Ortholog groups for eukaryotic genomes. Genome Res. 2003;13:2178\u201389.","journal-title":"Genome Res"},{"issue":"13","key":"2148_CR11","doi-asserted-by":"publisher","first-page":"1710","DOI":"10.1093\/bioinformatics\/btg213","volume":"19","author":"DP Wall","year":"2003","unstructured":"Wall DP, Fraser HB, Hirsh AE. Detecting putative orthologs. Bioinformatics. 2003;19(13):1710\u20131.","journal-title":"Bioinformatics"},{"issue":"suppl_1","key":"2148_CR12","first-page":"D289","volume":"39","author":"AM Altenhoff","year":"2010","unstructured":"Altenhoff AM, Schneider A, Gonnet GH, Dessimoz C. OMA 2011: orthology inference among 1000 complete genomes. Nucleic Acids Res. 2010;39(suppl_1):D289\u201394.","journal-title":"Nucleic Acids Res"},{"issue":"6","key":"2148_CR13","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1093\/bioinformatics\/btk040","volume":"22","author":"JC Chiu","year":"2006","unstructured":"Chiu JC, Lee EK, Egan MG, Sarkar IN, Coruzzi GM, DeSalle R. OrthologID: automation of genome-scale ortholog identification within a parsimony framework. Bioinformatics. 2006;22(6):699\u2013707.","journal-title":"Bioinformatics"},{"key":"2148_CR14","doi-asserted-by":"publisher","first-page":"D190","DOI":"10.1093\/nar\/gkp951","volume":"38","author":"J Muller","year":"2010","unstructured":"Muller J, Szklarczyk D, Julien P, Letunic IA, Roth M, Kuhn S, Powell C, Mering TV, Doerks LJ, Bork APJ. eggNOG v2.0: extending the evolutionary genealogy of genes with enhanced non-supervised orthologous groups, species and functional annotations. Nucleic Acids Res. 2010;38:D190\u20135.","journal-title":"Nucleic Acids Res"},{"key":"2148_CR15","volume-title":"Computational comparative genomics: genes, regulation, evolution","author":"MK Kamvysselis","year":"2003","unstructured":"Kamvysselis MK. Computational comparative genomics: genes, regulation, evolution. Massachusetts: Massachusetts Institute Technol; 2003."},{"key":"2148_CR16","doi-asserted-by":"crossref","unstructured":"Towfic F, Honavar V: Detection of Gene Orthology Based On Protein-Protein Interaction Networks. In: IEEE International Conference on Bioinformatics and Biomedicine, BIBM: 2009; Washington DC, USA.","DOI":"10.1109\/BIBM.2009.85"},{"issue":"1","key":"2148_CR17","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1186\/1471-2105-11-10","volume":"11","author":"G Shi","year":"2010","unstructured":"Shi G, Zhang L, Jiang T. MSOAR 2.0: incorporating tandem duplications into ortholog assignment based on genome rearrangement. BMC Bioinformatics. 2010;11(1):10.","journal-title":"BMC Bioinformatics"},{"key":"2148_CR18","doi-asserted-by":"publisher","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"SF Altschul","year":"1997","unstructured":"Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ, Gapped BLAST. PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res. 1997;25:3389\u2013402.","journal-title":"Nucleic Acids Res"},{"issue":"6","key":"2148_CR19","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1093\/bioinformatics\/bti045","volume":"21","author":"XH Zheng","year":"2005","unstructured":"Zheng XH, Lu F, Wang Z-Y, Zhong F, Hoover J, Mural R. Using shared genomic synteny and shared protein functions to enhance the identification of orthologous gene pairs. Bioinformatics. 2005;21(6):703\u201310.","journal-title":"Bioinformatics"},{"issue":"4","key":"2148_CR20","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1109\/TCBB.2005.48","volume":"2","author":"X Chen","year":"2005","unstructured":"Chen X, Zheng J, Fu Z, Nan P, Zhong Y, Lonardi S, Jiang T. Assignment of orthologous genes via genome rearrangement. IEEE ACM Trans Comput Biol Bioinform. 2005;2(4):302\u201315.","journal-title":"IEEE ACM Trans Comput Biol Bioinform"},{"issue":"Suppl 3","key":"2148_CR21","doi-asserted-by":"publisher","first-page":"S7","DOI":"10.1186\/1471-2105-11-S3-S7","volume":"11","author":"F Towfic","year":"2010","unstructured":"Towfic F, VanderPIas S, OIiver CA, Couture O, TuggIe CK, GreenIee MHW, Honavar V. Detection of gene orthology from gene co-expression and protein interaction networks. BMC Bioinformatics. 2010;11(Suppl 3):S7.","journal-title":"BMC Bioinformatics"},{"key":"2148_CR22","doi-asserted-by":"crossref","unstructured":"Chen TW, Wu TH, Ng WV: DODO: an efficient orthologous genes assignment tool based on domain architectures. Domain based ortholog detection. BMC Bioinformatics 2010, 11(Suppl 7)(S6).","DOI":"10.1186\/1471-2105-11-S7-S6"},{"key":"2148_CR23","unstructured":"Rasmussen M, Kellis M. Multi-bus: an algorithm for resolving multi-species gene correspondence and gene family relationships. CSAIL Res. 2005."},{"issue":"4","key":"2148_CR24","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1534\/g3.115.017095","volume":"5","author":"MC Maher","year":"2015","unstructured":"Maher MC, Hernandez RD. Rock, paper, scissors: harnessing complementarity in Ortholog detection methods improves comparative genomic inference. G3: Genes|Genomes|Genetics. 2015;5(4):629\u201338.","journal-title":"G3: Genes|Genomes|Genetics"},{"issue":"1","key":"2148_CR25","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1000262","volume":"5","author":"AM Altenhoff","year":"2009","unstructured":"Altenhoff AM, Dessimoz C. Phylogenetic and functional assessment of Orthologs inference projects and methods. PLoS Comput Biol. 2009;5(1):e1000262.","journal-title":"PLoS Comput Biol"},{"issue":"4","key":"2148_CR26","doi-asserted-by":"publisher","first-page":"R31","DOI":"10.1186\/gb-2006-7-4-r31","volume":"7","author":"T Hulsen","year":"2006","unstructured":"Hulsen T, Huynen MA, de Vlieg J, Groenen PMA. Benchmarking ortholog identification methods using functional genomics data. Genome Biol. 2006;7(4):R31.","journal-title":"Genome Biol"},{"issue":"4","key":"2148_CR27","doi-asserted-by":"publisher","first-page":"e18755","DOI":"10.1371\/journal.pone.0018755","volume":"6","author":"L Salichos","year":"2011","unstructured":"Salichos L, Rokas A. Evaluating ortholog prediction algorithms in a yeast model clade. PLoS One. 2011;6(4):e18755.","journal-title":"PLoS One"},{"key":"2148_CR28","doi-asserted-by":"publisher","first-page":"1456","DOI":"10.1101\/gr.3672305","volume":"15","author":"KP Byrne","year":"2005","unstructured":"Byrne KP, Wolfe KH. The yeast gene order browser: combining curated homology and syntenic context reveals gene fate in polyploid species. Genome Res. 2005;15:1456\u201361.","journal-title":"Genome Res"},{"key":"2148_CR29","doi-asserted-by":"publisher","first-page":"1046","DOI":"10.1038\/35082561","volume":"411","author":"AE Hirsh","year":"2001","unstructured":"Hirsh AE, Fraser HB. Protein dispensability and rate of evolution. Nature. 2001;411:1046\u20139.","journal-title":"Nature"},{"issue":"14","key":"2148_CR30","doi-asserted-by":"publisher","first-page":"e9","DOI":"10.1093\/bioinformatics\/btl213","volume":"22","author":"A Alexeyenko","year":"2006","unstructured":"Alexeyenko A, Tamas I, Liu G, Sonnhammer ELL. Automatic clustering of orthologs and inparalogs shared by multiple proteomes. Bioinformatics. 2006;22(14):e9\u2013e15.","journal-title":"Bioinformatics"},{"issue":"21","key":"2148_CR31","doi-asserted-by":"publisher","first-page":"2993","DOI":"10.1093\/bioinformatics\/btu492","volume":"30","author":"ELL Sonnhammer","year":"2014","unstructured":"Sonnhammer ELL, Gabald\u00f3n T, Sousa da Silva AW, Martin M, Robinson-Rechavi M, Boeckmann B, Thomas PD, Dessimoz C. Quest for Orthologs consortium.(2014) big data and other challenges in the quest for orthologs. Bioinformatics. 2014;30(21):2993\u20138.","journal-title":"Bioinformatics"},{"key":"2148_CR32","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1002\/widm.1134","volume":"4","author":"A Fern\u00e1ndez","year":"2014","unstructured":"Fern\u00e1ndez A, Sd R, L\u00f3pez V, Bawakid A, MJd J, Ben\u00edtez JM, Herrera F. Big data with cloud computing: an insight on the computing environment, MapReduce, and programming frameworks. WIREs Data Min Knowl Discovery. 2014;4:380\u2013409.","journal-title":"WIREs Data Min Knowl Discovery"},{"key":"2148_CR33","unstructured":"Dean J, Ghemawat S: MapReduce: simplified data processing on large clusters. In: OSDI\u201904: Proceedings of the 6th Symposium on Operating System Design and Implementation,; San Francisco, California, USA. USENIX association 2004: 137\u2013150."},{"key":"2148_CR34","unstructured":"Apache Spark. 2017: Lightning-fast cluster computing [ https:\/\/spark.apache.org\/ ]. Accessed May 2016."},{"key":"2148_CR35","unstructured":"Apache Flink. 2017 [ https:\/\/flink.apache.org\/ ]. Accessed May 2016."},{"key":"2148_CR36","doi-asserted-by":"crossref","unstructured":"Galpert D, R\u00edo Sd, Herrera F, Ancede-Gallardo E, Antunes A, Ag\u00fcero-Chapin G: An effective big data supervised imbalanced classification approach for Ortholog detection in related yeast species. In: BioMed Research International. vol. 2015, Article ID 748681; 2015: 12 pages.","DOI":"10.1155\/2015\/748681"},{"key":"2148_CR37","first-page":"1","volume-title":"9th USENIX conference on networked systems design and implementation; San Jose, CA","author":"M Zaharia","year":"2012","unstructured":"Zaharia M, Chowdhury M, Das T, Dave A, Ma J, McCauley M, Franklin M, Shenker S, Stoica I. Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: 9th USENIX conference on networked systems design and implementation; San Jose, CA; 2012. p. 1\u201314."},{"issue":"6","key":"2148_CR38","doi-asserted-by":"publisher","first-page":"e44","DOI":"10.1093\/nar\/gkr1261","volume":"40","author":"K Mahmood","year":"2012","unstructured":"Mahmood K, Webb GI, Song J, Whisstock JC, Konagurthu AS. Efficient large-scale protein sequence comparison and gene matching to identify orthologs and co-orthologs. Nucleic Acids Res. 2012;40(6):e44.","journal-title":"Nucleic Acids Res"},{"issue":"9","key":"2148_CR39","doi-asserted-by":"publisher","first-page":"1396","DOI":"10.1093\/bioinformatics\/btv006","volume":"31","author":"I Borozan","year":"2015","unstructured":"Borozan I, Watt S, Ferretti V. Integrating alignment-based and alignment-free sequence similarity measures for biological sequence classification. Bioinformatics. 2015;31(9):1396\u2013404.","journal-title":"Bioinformatics"},{"issue":"suppl_2","key":"2148_CR40","doi-asserted-by":"publisher","first-page":"W385","DOI":"10.1093\/nar\/gkr284","volume":"39","author":"HB Rao","year":"2011","unstructured":"Rao HB, Zhu F, Yang GB, Li ZR, Chen YZ. Update of PROFEAT: a web server for computing structural and physicochemical features of proteins and peptides from amino acid sequence. Nucleic Acids Res. 2011;39(suppl_2):W385\u201390.","journal-title":"Nucleic Acids Res"},{"key":"2148_CR41","unstructured":"Molina R, Ag\u00fcero-Chapin G, P\u00e9rez-Gonz\u00e1lez M: TI2BioP (Topological Indices to BioPolymers) version 2.0. Molecular Simulation and Drug Design (MSDD), Chemical Bioactives Center, Central University of Las Villas, Cuba 2011."},{"issue":"1","key":"2148_CR42","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1186\/1471-2105-8-300","volume":"8","author":"SA Ong","year":"2007","unstructured":"Ong SA, Lin HH, Chen YZ, Li ZR, Cao Z. Efficacy of different protein descriptors in predicting protein functional families. BMC Bioinformatics. 2007;8(1):300.","journal-title":"BMC Bioinformatics"},{"key":"2148_CR43","doi-asserted-by":"publisher","first-page":"19","DOI":"10.4137\/BBI.S315","volume":"1","author":"ZQ Tang","year":"2007","unstructured":"Tang ZQ, Lin HH, Zhang HL, Han LY, Chen X, Chen YZ. Prediction of functional class of proteins and peptides irrespective of sequence homology by support vector machines. Bioinformatics Biol Insights. 2007;1:19.","journal-title":"Bioinformatics Biol Insights"},{"issue":"7","key":"2148_CR44","doi-asserted-by":"publisher","first-page":"e65926","DOI":"10.1371\/journal.pone.0065926","volume":"8","author":"G Ag\u00fcero-Chapin","year":"2013","unstructured":"Ag\u00fcero-Chapin G, Molina-Ruiz R, Maldonado E, de la Riva G, S\u00e1nchez-Rodr\u00edguez A, Vasconcelos V, Antunes A. Exploring the adenylation domain repertoire of nonribosomal peptide synthetases using an ensemble of sequence-search methods. PLoS One. 2013 Jul 16;8(7):e65926.","journal-title":"PLoS One"},{"issue":"2","key":"2148_CR45","doi-asserted-by":"publisher","first-page":"431","DOI":"10.1007\/s00726-010-0653-9","volume":"40","author":"G Aguero-Chapin","year":"2011","unstructured":"Aguero-Chapin G, Perez-Machado G, Molina-Ruiz R, Perez-Castillo Y, Morales-Helguera A, Vasconcelos V, Antunes A. TI2BioP: topological indices to BioPolymers. Its practical use to unravel cryptic bacteriocin-like domains. Amino Acids. 2011;40(2):431\u201342.","journal-title":"Amino Acids"},{"key":"2148_CR46","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/j.jtbi.2014.05.016","volume":"358","author":"U Gunasinghe","year":"2014","unstructured":"Gunasinghe U, Alahakoon D, Bedingfield S. Extraction of high quality k-words for alignment-free sequence comparison. J Theor Biol. 2014;358:31\u201351.","journal-title":"J Theor Biol"},{"issue":"14","key":"2148_CR47","doi-asserted-by":"publisher","first-page":"1991","DOI":"10.1093\/bioinformatics\/btu177","volume":"30","author":"C-A Leimeister","year":"2014","unstructured":"Leimeister C-A, Boden M, Horwege S, Lindner S, Morgenstern B. Fast alignment-free sequence comparison using spaced-word frequencies. Bioinformatics. 2014;30(14):1991\u20139.","journal-title":"Bioinformatics"},{"key":"2148_CR48","doi-asserted-by":"publisher","first-page":"23262","DOI":"10.1074\/jbc.M401932200","volume":"279","author":"M Bhasin","year":"2004","unstructured":"Bhasin M, Raghava GPS. Classification of nuclear receptors based on amino acid composition and dipeptide composition. J Bio Chem. 2004;279:23262.","journal-title":"J Bio Chem"},{"issue":"2","key":"2148_CR49","doi-asserted-by":"crossref","first-page":"121","DOI":"10.3233\/ISB-00350","volume":"8","author":"M Kumar","year":"2008","unstructured":"Kumar M, Thakur V, Raghava GP. COPid: composition based protein identification. In Silico Biol. 2008;8(2):121\u20138.","journal-title":"In Silico Biol"},{"key":"2148_CR50","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1002\/prot.1035","volume":"43","author":"K-C Chou","year":"2001","unstructured":"Chou K-C. Prediction of protein cellular attributes using Pseudo-amino acid composition. Proteins Struct Funct Genet. 2001;43:246\u201355.","journal-title":"Proteins Struct Funct Genet"},{"issue":"1","key":"2148_CR51","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1002\/ajpa.20250","volume":"129","author":"RR Sokal","year":"2006","unstructured":"Sokal RR, Thomson BA. Population structure inferred by local spatial autocorrelation: an example from an Amerindian tribal population. Am J Phys Anthropol. 2006 Jan;129(1):121\u201331.","journal-title":"Am J Phys Anthropol"},{"issue":"3","key":"2148_CR52","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1002\/bip.360270308","volume":"27","author":"DS Horne","year":"1988","unstructured":"Horne DS. Prediction of protein helix content from an autocorrelation analysis of sequence hydrophobicities. Biopolymers. 1988 Mar;27(3):451\u201377.","journal-title":"Biopolymers"},{"issue":"7","key":"2148_CR53","doi-asserted-by":"publisher","first-page":"960","DOI":"10.1093\/bioinformatics\/btt072","volume":"29","author":"D-S Cao","year":"2013","unstructured":"Cao D-S, Xu Q-S, Liang Y-Z. Propy: a tool to generate various modes of Chou\u2019s PseAAC. Bioinformatics. 2013;29(7):960\u20132.","journal-title":"Bioinformatics"},{"key":"2148_CR54","doi-asserted-by":"publisher","first-page":"8700","DOI":"10.1073\/pnas.92.19.8700","volume":"92","author":"I Dubchak","year":"1995","unstructured":"Dubchak I, Muchnik I, Holbrook SR, Kim SH. Prediction of protein folding class using global description of amino acid sequence. Proc Natl Acad Sci U S A. 1995;92:8700\u20134.","journal-title":"Proc Natl Acad Sci U S A"},{"issue":"4","key":"2148_CR55","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1002\/(SICI)1097-0134(19990601)35:4<401::AID-PROT3>3.0.CO;2-K","volume":"35","author":"I Dubchak","year":"1999","unstructured":"Dubchak I, Muchnik I, Mayor C, Dralyuk I, Kim SH. Recognition of a protein fold in the context of the SCOP classification. Proteins Struct Funct Bioinf. 1999;35(4):401\u20137.","journal-title":"Proteins Struct Funct Bioinf"},{"issue":"2","key":"2148_CR56","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1006\/bbrc.2000.3815","volume":"278","author":"K-C Chou","year":"2000","unstructured":"Chou K-C. Prediction of protein subcellular locations by incorporating quasi-sequence-order effect. Biochem Biophys Res Commun. 2000;278(2):477\u201383.","journal-title":"Biochem Biophys Res Commun"},{"key":"2148_CR57","volume-title":"The First IEEE International Workshop on Classification Problems Embedded in the Nature of Big Data (CPBD): edited by Trustcom\/BigDataSE\/ISPA I. Trustcom-BigDataSE-ISPA 2015","author":"R Sd","year":"2015","unstructured":"Sd R, Ben\u00edtez JM, Herrera F. Analysis of data preprocessing increasing the oversampling ratio for extremely imbalanced big data classification. In: The First IEEE International Workshop on Classification Problems Embedded in the Nature of Big Data (CPBD): edited by Trustcom\/BigDataSE\/ISPA I. Trustcom-BigDataSE-ISPA 2015; 2015."},{"key":"2148_CR58","unstructured":"Deza E. Dictionary of Distances. Amsterdam: Elsevier; 2006."},{"issue":"2","key":"2148_CR59","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/s40747-017-0037-9","volume":"3","author":"A Fern\u00e1ndez","year":"2017","unstructured":"Fern\u00e1ndez A, Sd R, Chawla NV, Herrera F. An insight into imbalanced big data classification: outcomes and challenges. Complex Intelligent Syst. 2017;3(2):105\u201320.","journal-title":"Complex Intelligent Syst"},{"issue":"1","key":"2148_CR60","first-page":"1235","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng X, Bradley J, Yavuz B, Sparks E, Venkataraman S, Liu D, Freeman J, Tsai D, Amde M, Owen S. Mllib: machine learning in apache spark. J Machine Learning Res. 2016;17(1):1235\u201341.","journal-title":"J Machine Learning Res"},{"issue":"16","key":"2148_CR61","doi-asserted-by":"publisher","first-page":"2044","DOI":"10.1093\/bioinformatics\/btl286","volume":"22","author":"TF DeLuca","year":"2006","unstructured":"DeLuca TF, Wu I-H, Pu J, Monaghan T, Peshkin L, Singh S, Wall DP. Roundup: a multi-genome repository of orthologs and evolutionary distance. Bioinformatics. 2006;22(16):2044\u20136.","journal-title":"Bioinformatics"},{"key":"2148_CR62","unstructured":"Witten IH, Frank E, Hall MA, Pal CJ: Data Mining: Practical machine learning tools and techniques: Morgan Kaufmann; 2016."},{"key":"2148_CR63","doi-asserted-by":"crossref","first-page":"3.5.1","DOI":"10.1002\/0471250953.bi0305s43","volume":"43","author":"WR Pearson","year":"2013","unstructured":"Pearson WR. Selecting the right similarity-scoring Matrix. Curr Protoc Bioinformatics. 2013;43:3.5.1\u20139.","journal-title":"Curr Protoc Bioinformatics"},{"key":"2148_CR64","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/j.inffus.2017.10.001","volume":"42","author":"S Ram\u00edrez-Gallego","year":"2018","unstructured":"Ram\u00edrez-Gallego S, Fern\u00e1ndez A, Garc\u00eda S, Chen M, Herrera F. Big data: tutorial and guidelines on information and process fusion for analytics algorithms with MapReduce. Information Fusion. 2018;42:51\u201361.","journal-title":"Information Fusion"},{"issue":"3","key":"2148_CR65","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1093\/bioinformatics\/btm585","volume":"24","author":"GM Hagelsieb","year":"2008","unstructured":"Hagelsieb GM, Latimer K. Choosing BLAST options for better detection of orthologs as reciprocal best hits. Bioinformatics. 2008;24(3):319\u201324.","journal-title":"Bioinformatics"},{"issue":"5","key":"2148_CR66","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1093\/bib\/bbr030","volume":"12","author":"DM Kristensen","year":"2011","unstructured":"Kristensen DM, Wolf YI, Mushegian AR, Koonin EV. Computational methods for gene Orthology inference. Brief Bioinform. 2011;12(5):379\u201391.","journal-title":"Brief Bioinform"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-018-2148-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s12859-018-2148-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-018-2148-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T03:19:53Z","timestamp":1751599193000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-018-2148-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5,3]]},"references-count":66,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["2148"],"URL":"https:\/\/doi.org\/10.1186\/s12859-018-2148-8","relation":{},"ISSN":["1471-2105"],"issn-type":[{"type":"electronic","value":"1471-2105"}],"subject":[],"published":{"date-parts":[[2018,5,3]]},"assertion":[{"value":"13 February 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"166"}}