{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T16:50:37Z","timestamp":1776099037252,"version":"3.50.1"},"reference-count":74,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T00:00:00Z","timestamp":1641254400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T00:00:00Z","timestamp":1641254400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s10618-021-00813-0","type":"journal-article","created":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T07:03:04Z","timestamp":1641279784000},"page":"668-708","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Sequence graph transform (SGT): a feature embedding function for sequence data mining"],"prefix":"10.1007","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1146-9325","authenticated-orcid":false,"given":"Chitta","family":"Ranjan","sequence":"first","affiliation":[]},{"given":"Samaneh","family":"Ebrahimi","sequence":"additional","affiliation":[]},{"given":"Kamran","family":"Paynabar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,4]]},"reference":[{"key":"813_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-07821-2","volume-title":"Frequent pattern mining","author":"CC Aggarwal","year":"2014","unstructured":"Aggarwal CC, Han J (2014) Frequent pattern mining. Springer, Berlin"},{"issue":"17","key":"813_CR2","doi-asserted-by":"publisher","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"SF Altschul","year":"1997","unstructured":"Altschul SF, Madden TL, Sch\u00e4ffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997) Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25(17):3389\u20133402","journal-title":"Nucleic Acids Res"},{"key":"813_CR3","doi-asserted-by":"crossref","unstructured":"Ayres J, Flannick J, Gehrke J, Yiu T (2002) Sequential pattern mining using a bitmap representation. In: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, pp 429\u2013435","DOI":"10.1145\/775047.775109"},{"issue":"9","key":"813_CR4","doi-asserted-by":"publisher","first-page":"2522","DOI":"10.1109\/TKDE.2015.2416723","volume":"27","author":"A Bagnall","year":"2015","unstructured":"Bagnall A, Lines J, Hills J, Bostrom A (2015) Time-series classification with COTE: the collective of transformation-based ensembles. IEEE Trans Knowle Data Eng 27(9):2522\u20132535","journal-title":"IEEE Trans Knowle Data Eng"},{"issue":"3","key":"813_CR5","doi-asserted-by":"publisher","first-page":"606","DOI":"10.1007\/s10618-016-0483-9","volume":"31","author":"A Bagnall","year":"2017","unstructured":"Bagnall A, Lines J, Bostrom A, Large J, Keogh E (2017) The great time series classification bake off: a review and experimental evaluation of recent algorithmic advances. Data Min Knowl Discov 31(3):606\u2013660","journal-title":"Data Min Knowl Discov"},{"key":"813_CR6","unstructured":"Bailey TL, Elkan C et\u00a0al (1994) Fitting a mixture model by expectation maximization to discover motifs in bipolymers"},{"issue":"11","key":"813_CR7","doi-asserted-by":"publisher","first-page":"2796","DOI":"10.1109\/TPAMI.2013.72","volume":"35","author":"MG Baydogan","year":"2013","unstructured":"Baydogan MG, Runger G, Tuv E (2013) A bag-of-features framework to classify time series. IEEE Trans Pattern Anal Mach Intell 35(11):2796\u20132802","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"813_CR8","doi-asserted-by":"crossref","unstructured":"Bostrom A, Bagnall A (2017) Binary shapelet transform for multiclass time series classification. In: Transactions on large-scale data-and knowledge-centered systems XXXII. Springer, pp 24\u201346","DOI":"10.1007\/978-3-662-55608-5_2"},{"issue":"5","key":"813_CR9","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1093\/bioinformatics\/17.5.419","volume":"17","author":"J Buhler","year":"2001","unstructured":"Buhler J (2001) Efficient large-scale sequence comparison by locality-sensitive hashing. Bioinformatics 17(5):419\u2013428","journal-title":"Bioinformatics"},{"issue":"2","key":"813_CR10","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1089\/10665270252935430","volume":"9","author":"J Buhler","year":"2002","unstructured":"Buhler J, Tompa M (2002) Finding motifs using random projections. J Comput Biol 9(2):225\u2013242","journal-title":"J Comput Biol"},{"issue":"4","key":"813_CR11","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1023\/A:1024992613384","volume":"7","author":"I Cadez","year":"2003","unstructured":"Cadez I, Heckerman D, Meek C, Smyth P, White S (2003) Model-based clustering and visualization of navigation patterns on a web site. Data Min Knowl Discov 7(4):399\u2013424","journal-title":"Data Min Knowl Discov"},{"key":"813_CR12","unstructured":"Chiu D-Y, Wu Y-H, Chen AL (2004) An efficient algorithm for mining frequent sequences by a new strategy without support counting. In: Proceedings of 20th international conference on data engineering. IEEE, pp 375\u2013386"},{"issue":"1","key":"813_CR13","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1186\/1748-7188-7-34","volume":"7","author":"M Comin","year":"2012","unstructured":"Comin M, Verzotto D (2012) Alignment-free phylogeny of whole genomes using underlying subwords. Algorithms Mol Biol 7(1):34","journal-title":"Algorithms Mol Biol"},{"key":"813_CR14","unstructured":"Costa F, De\u00a0Grave K (2010) Fast neighborhood subgraph pairwise distance kernel. In: Proceedings of the 26th international conference on machine learning. Omnipress, pp 255\u2013262"},{"key":"813_CR15","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511801389","volume-title":"An introduction to support vector machines and other kernel-based learning methods","author":"N Cristianini","year":"2000","unstructured":"Cristianini N, Shawe-Taylor J et al (2000) An introduction to support vector machines and other kernel-based learning methods. Cambridge University Press, Cambridge"},{"key":"813_CR16","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1016\/j.ins.2013.02.030","volume":"239","author":"H Deng","year":"2013","unstructured":"Deng H, Runger G, Tuv E, Vladimir M (2013) A time series forest for classification and feature extraction. Inf Sci 239:142\u2013153","journal-title":"Inf Sci"},{"key":"813_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.tcs.2012.08.005","volume":"462","author":"G Didier","year":"2012","unstructured":"Didier G, Corel E, Laprevotte I, Grossmann A, Land\u00e9s-Devauchelle C (2012) Variable length local decoding and alignment-free sequence comparison. Theor Comput Sci 462:1\u201311","journal-title":"Theor Comput Sci"},{"issue":"1","key":"813_CR18","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1186\/1471-2105-5-113","volume":"5","author":"RC Edgar","year":"2004","unstructured":"Edgar RC (2004) MUSCLE: a multiple sequence alignment method with reduced time and space complexity. BMC Bioinform 5(1):113","journal-title":"BMC Bioinform"},{"issue":"19","key":"813_CR19","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar RC (2010) Search and clustering orders of magnitude faster than BLAST. Bioinformatics 26(19):2460\u20132461","journal-title":"Bioinformatics"},{"key":"813_CR20","unstructured":"Eskin E, Weston J, Noble WS, Leslie CS (2003) Mismatch string kernels for SVM protein classification. In: Advances in neural information processing systems, pp 1441\u20131448"},{"key":"813_CR21","unstructured":"Farhan M, Tariq J, Zaman A, Shabbir M, Khan IU (2017) Efficient approximation algorithms for strings kernel based sequence classification. In: Advances in neural information processing systems, pp 6938\u20136948"},{"key":"813_CR22","doi-asserted-by":"crossref","unstructured":"Ferreira F, Pacheco A (2005) Simulation of semi-Markov processes and Markov chains ordered in level crossing. In: Next generation internet networks. IEEE, pp 121\u2013128","DOI":"10.1109\/NGI.2005.1431656"},{"issue":"23","key":"813_CR23","doi-asserted-by":"publisher","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","volume":"28","author":"L Fu","year":"2012","unstructured":"Fu L, Niu B, Zhu Z, Wu S, Li W (2012) CD-HIT: accelerated for clustering the next-generation sequencing data. Bioinformatics 28(23):3150\u20133152","journal-title":"Bioinformatics"},{"key":"813_CR24","unstructured":"Gamboa JCB (2017) Deep learning for time-series analysis. arXiv preprint arXiv:1701.01887"},{"key":"813_CR25","doi-asserted-by":"publisher","first-page":"136","DOI":"10.3389\/fgene.2017.00136","volume":"8","author":"G Glusman","year":"2017","unstructured":"Glusman G, Mauldin DE, Hood LE, Robinson M (2017) Ultrafast comparison of personal genomes via precomputed genome fingerprints. Front Genet 8:136","journal-title":"Front Genet"},{"key":"813_CR26","unstructured":"Graves A (2013) Generating sequences with recurrent neural networks. arXiv preprint arXiv:1308.0850"},{"key":"813_CR27","unstructured":"Han J, Pei J, Mortazavi-Asl B, Pinto H, Chen Q, Dayal U, Hsu M (2001) Prefixspan: mining sequential patterns efficiently by prefix-projected pattern growth. In: Proceedings of the 17th international conference on data engineering, pp 215\u2013224"},{"key":"813_CR28","unstructured":"Haussler D (1999) Convolution kernels on discrete structures. Technical report, Department of Computer Science, University of California"},{"key":"813_CR29","unstructured":"Helske S, Helske J (2017) Mixture hidden Markov models for sequence data: the seqHMM package in R. arXiv preprint arXiv:1704.00543"},{"issue":"4","key":"813_CR30","doi-asserted-by":"publisher","first-page":"851","DOI":"10.1007\/s10618-013-0322-1","volume":"28","author":"J Hills","year":"2014","unstructured":"Hills J, Lines J, Baranauskas E, Mapp J, Bagnall A (2014) Classification of time series by shapelet transformation. Data Min Knowl Discov 28(4):851\u2013881","journal-title":"Data Min Knowl Discov"},{"key":"813_CR31","doi-asserted-by":"crossref","unstructured":"Indyk P, Motwani R (1998) Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the thirtieth annual ACM symposium on theory of computing. ACM, pp 604\u2013613","DOI":"10.1145\/276698.276876"},{"issue":"2","key":"813_CR32","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1007\/s10618-015-0418-x","volume":"30","author":"RJ Kate","year":"2016","unstructured":"Kate RJ (2016) Using dynamic time warping distances as features for improved time series classification. Data Min Knowl Discov 30(2):283\u2013312","journal-title":"Data Min Knowl Discov"},{"issue":"03","key":"813_CR33","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1142\/S021972000500120X","volume":"3","author":"R Kuang","year":"2005","unstructured":"Kuang R, Ie E, Wang K, Wang K, Siddiqi M, Freund Y, Leslie C (2005) Profile-based string kernels for remote homology detection and motif extraction. J Bioinform Comput Biol 3(03):527\u2013550","journal-title":"J Bioinform Comput Biol"},{"key":"813_CR34","unstructured":"Kuksa PP, Huang P-H, Pavlovic V (2009) Scalable algorithms for string kernels with inexact matching. In: Advances in neural information processing systems, pp 881\u2013888"},{"key":"813_CR35","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-61350-056-9","volume-title":"Pattern discovery using sequence data mining: applications and studies","author":"P Kumar","year":"2012","unstructured":"Kumar P, Krishna PR, Raju SB (2012) Pattern discovery using sequence data mining: applications and studies. Information Science Reference, Hershey"},{"issue":"5131","key":"813_CR36","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1126\/science.8211139","volume":"262","author":"CE Lawrence","year":"1993","unstructured":"Lawrence CE, Altschul SF, Boguski MS, Liu JS, Neuwald AF, Wootton JC (1993) Detecting subtle sequence signals: a Gibbs sampling strategy for multiple alignment. Science 262(5131):208\u2013214","journal-title":"Science"},{"key":"813_CR37","doi-asserted-by":"crossref","unstructured":"Leslie C, Eskin E, Noble WS (2001) The spectrum kernel: a string kernel for SVM protein classification. In: Biocomputing 2002. World Scientific, pp 564\u2013575","DOI":"10.1142\/9789812799623_0053"},{"issue":"4","key":"813_CR38","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1093\/bioinformatics\/btg431","volume":"20","author":"CS Leslie","year":"2004","unstructured":"Leslie CS, Eskin E, Cohen A, Weston J, Noble WS (2004) Mismatch string kernels for discriminative protein classification. Bioinformatics 20(4):467\u2013476","journal-title":"Bioinformatics"},{"issue":"5","key":"813_CR39","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1093\/bib\/bbq015","volume":"11","author":"H Li","year":"2010","unstructured":"Li H, Homer N (2010) A survey of sequence alignment algorithms for next-generation sequencing. Brief Bioinform 11(5):473\u2013483","journal-title":"Brief Bioinform"},{"issue":"3","key":"813_CR40","doi-asserted-by":"publisher","first-page":"565","DOI":"10.1007\/s10618-014-0361-2","volume":"29","author":"J Lines","year":"2015","unstructured":"Lines J, Bagnall A (2015) Time series classification with ensembles of elastic distance measures. Data Min Knowl Discov 29(3):565\u2013592","journal-title":"Data Min Knowl Discov"},{"key":"813_CR41","doi-asserted-by":"crossref","unstructured":"Lines J, Taylor S, Bagnall A (2016) HIVE-COTE: the hierarchical vote collective of transformation-based ensembles for time series classification. In: IEEE 16th international conference on data mining (ICDM). IEEE, pp 1041\u20131046","DOI":"10.1109\/ICDM.2016.0133"},{"issue":"5","key":"813_CR42","first-page":"52","volume":"12","author":"J Lines","year":"2018","unstructured":"Lines J, Taylor S, Bagnall A (2018) Time series classification with HIVE-COTE: The hierarchical vote collective of transformation-based ensembles. ACM Trans Knowl Discov Data (TKDD) 12(5):52","journal-title":"ACM Trans Knowl Discov Data (TKDD)"},{"issue":"12","key":"813_CR43","doi-asserted-by":"publisher","first-page":"4412","DOI":"10.1073\/pnas.86.12.4412","volume":"86","author":"DJ Lipman","year":"1989","unstructured":"Lipman DJ, Altschul SF, Kececioglu JD (1989) A tool for multiple sequence alignment. Proc Natl Acad Sci 86(12):4412\u20134415","journal-title":"Proc Natl Acad Sci"},{"key":"813_CR44","doi-asserted-by":"crossref","unstructured":"Liu C, Wang F, Hu J, Xiong H (2015) Temporal phenotyping from longitudinal electronic health records: a graph based framework. In: Proceedings of the 21th ACM SIGKDD international conference on knowledge discovery and data mining. ACM, pp 705\u2013714","DOI":"10.1145\/2783258.2783352"},{"issue":"1","key":"813_CR45","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1109\/TKDE.2015.2468715","volume":"28","author":"C Liu","year":"2016","unstructured":"Liu C, Zhang K, Xiong H, Jiang G, Yang Q (2016) Temporal skeletonization on sequential data: patterns, categorization, and visualization. IEEE Trans Knowl Data Eng 28(1):211\u2013223","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"813_CR46","doi-asserted-by":"crossref","unstructured":"Masseglia F, Cathala F, Poncelet P (1998) The PSP approach for mining sequential patterns. In: Principles of data mining and knowledge discovery, pp 176\u2013184","DOI":"10.1007\/BFb0094818"},{"issue":"3","key":"813_CR47","first-page":"211","volume":"15","author":"B Morgenstern","year":"1999","unstructured":"Morgenstern B (1999) DIALIGN 2: improvement of the segment-to-segment approach to multiple sequence alignment. Bioinformatics (Oxford, England) 15(3):211\u2013218","journal-title":"Bioinformatics (Oxford, England)"},{"key":"813_CR48","doi-asserted-by":"crossref","unstructured":"Neamtu R, Ahsan R, Rundensteiner EA, Sarkozy G, Keogh E, Dau HA, Nguyen C, Lovering C (2018) Generalized dynamic time warping: unleashing the warping power hidden in point-wise distances. In: IEEE 34th international conference on data engineering (ICDE). IEEE, pp 521\u2013532","DOI":"10.1109\/ICDE.2018.00054"},{"issue":"3","key":"813_CR49","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/0022-2836(70)90057-4","volume":"48","author":"SB Needleman","year":"1970","unstructured":"Needleman SB, Wunsch CD (1970) A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48(3):443\u2013453","journal-title":"J Mol Biol"},{"issue":"1","key":"813_CR50","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1006\/jmbi.2000.4042","volume":"302","author":"C Notredame","year":"2000","unstructured":"Notredame C, Higgins DG, Heringa J (2000) T-Coffee: A novel method for fast and accurate multiple sequence alignment. J Mol Biol 302(1):205\u2013217","journal-title":"J Mol Biol"},{"key":"813_CR51","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/0076-6879(90)83007-V","volume":"183","author":"WR Pearson","year":"1990","unstructured":"Pearson WR (1990) Rapid and sensitive sequence comparison with FASTP and FASTA. Methods Enzymol 183:63\u201398","journal-title":"Methods Enzymol"},{"key":"813_CR52","unstructured":"Ranjan C, Paynabar K, Helm JE, Pan J (2015) The impact of estimation: a new method for clustering and trajectory estimation in patient flow modeling. In: Production and operations management"},{"issue":"2","key":"813_CR53","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1038\/nmeth.1818","volume":"9","author":"M Remmert","year":"2012","unstructured":"Remmert M, Biegert A, Hauser A, S\u00f6ding J (2012) HHblits: lightning-fast iterative protein sequence searching by HMM-HMM alignment. Nat Methods 9(2):173\u2013175","journal-title":"Nat Methods"},{"issue":"1","key":"813_CR54","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1186\/1745-6150-1-11","volume":"1","author":"GK Sandve","year":"2006","unstructured":"Sandve GK, Drabl\u00f8s F (2006) A survey of motif discovery methods in an integrated framework. Biol Direct 1(1):11","journal-title":"Biol Direct"},{"issue":"6","key":"813_CR55","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1007\/s10618-014-0377-7","volume":"29","author":"P Sch\u00e4fer","year":"2015","unstructured":"Sch\u00e4fer P (2015) The BOSS is concerned with time series classification in the presence of noise. Data Min Knowl Discov 29(6):1505\u20131530","journal-title":"Data Min Knowl Discov"},{"key":"813_CR56","first-page":"2615","volume":"10","author":"Q Shi","year":"2009","unstructured":"Shi Q, Petterson J, Dror G, Langford J, Smola A, Vishwanathan S (2009) Hash kernels for structured data. J Mach Learn Res 10:2615\u20132637","journal-title":"J Mach Learn Res"},{"issue":"1","key":"813_CR57","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1145\/2522968.2522981","volume":"46","author":"JA Silva","year":"2013","unstructured":"Silva JA, Faria ER, Barros RC, Hruschka ER, de Carvalho AC, Gama J (2013) Data stream clustering: a survey. ACM Comput Surv (CSUR) 46(1):13","journal-title":"ACM Comput Surv (CSUR)"},{"key":"813_CR58","doi-asserted-by":"crossref","unstructured":"Siyari P, Dilkina B, Dovrolis C (2016) Lexis: An optimization framework for discovering the hierarchical structure of sequential data. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp 1185\u20131194","DOI":"10.1145\/2939672.2939741"},{"issue":"4","key":"813_CR59","doi-asserted-by":"publisher","first-page":"482","DOI":"10.1016\/0196-8858(81)90046-4","volume":"2","author":"TF Smith","year":"1981","unstructured":"Smith TF, Waterman MS (1981) Comparison of biosequences. Adv Appl Math 2(4):482\u2013489","journal-title":"Comparison of biosequences. Adv Appl Math"},{"key":"813_CR60","unstructured":"Smola AJ, Vishwanathan S (2003) Fast kernels for string and tree matching. In: Advances in neural information processing systems, pp 585\u2013592"},{"key":"813_CR61","doi-asserted-by":"crossref","unstructured":"Srikant R, Agrawal R (1996) Mining sequential patterns: generalizations and performance improvements. In: Advances in database technology\u2013EDBT\u201996, pp 1\u201317","DOI":"10.1007\/BFb0014140"},{"issue":"6","key":"813_CR62","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1093\/bioinformatics\/13.6.625","volume":"13","author":"J Stoye","year":"1997","unstructured":"Stoye J, Moulton V, Dress AW (1997) DCA: an efficient implementation of the divide-and-conquer approach to simultaneous multiple sequence alignment. Bioinformatics 13(6):625\u2013626","journal-title":"Bioinformatics"},{"key":"813_CR63","unstructured":"Sutskever I, Vinyals O, Le QV (2014) Sequence to sequence learning with neural networks. In: Advances in neural information processing systems, pp 3104\u20133112"},{"issue":"22","key":"813_CR64","doi-asserted-by":"publisher","first-page":"4673","DOI":"10.1093\/nar\/22.22.4673","volume":"22","author":"JD Thompson","year":"1994","unstructured":"Thompson JD, Higgins DG, Gibson TJ (1994a) CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position-specific gap penalties and weight matrix choice. Nucleic Acids Res 22(22):4673\u20134680","journal-title":"Nucleic Acids Res"},{"issue":"1","key":"813_CR65","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1093\/bioinformatics\/10.1.19","volume":"10","author":"JD Thompson","year":"1994","unstructured":"Thompson JD, Higgins DG, Gibson TJ (1994b) Improved sensitivity of profile searches through the use of sequence weights and gap excision. Bioinformatics 10(1):19\u201329","journal-title":"Bioinformatics"},{"key":"813_CR66","doi-asserted-by":"crossref","unstructured":"Wang JT, Zaki MJ, Toivonen HT, Shasha D (2005) Introduction to data mining in bioinformatics. In: Data mining in bioinformatics. Springer, pp 3\u20138","DOI":"10.1007\/1-84628-059-1_1"},{"issue":"4","key":"813_CR67","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1089\/cmb.1994.1.337","volume":"1","author":"L Wang","year":"1994","unstructured":"Wang L, Jiang T (1994) On the complexity of multiple sequence alignment. J Comput Biol 1(4):337\u2013348","journal-title":"J Comput Biol"},{"issue":"7","key":"813_CR68","doi-asserted-by":"publisher","first-page":"1004","DOI":"10.1093\/bioinformatics\/18.7.1004","volume":"18","author":"J-J Wesselink","year":"2002","unstructured":"Wesselink J-J, de la Iglesia B, James SA, Dicks JL, Roberts IN, Rayward-Smith VJ (2002) Determining a unique defining DNA sequence for yeast species using hashing techniques. Bioinformatics 18(7):1004\u20131010","journal-title":"Bioinformatics"},{"key":"813_CR69","doi-asserted-by":"crossref","unstructured":"Wu CH, Apweiler R, Bairoch A, Natale DA, Barker WC, Boeckmann B, Ferro S, Gasteiger E, Huang H, Lopez R et\u00a0al. (2006) The Universal Protein Resource (UniProt): an expanding universe of protein information. Nucleic Acids Res 34 (suppl\\_1): D187\u2013D191","DOI":"10.1093\/nar\/gkj161"},{"key":"813_CR70","doi-asserted-by":"crossref","unstructured":"Wu L, Yen IE-H, Huo S, Zhao L, Xu K, Ma L, Ji S, Aggarwal C (2019) Efficient global string kernel with random features: beyond counting substructures. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery and data mining, pp 520\u2013528","DOI":"10.1145\/3292500.3330923"},{"issue":"1","key":"813_CR71","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1145\/1882471.1882478","volume":"12","author":"Z Xing","year":"2010","unstructured":"Xing Z, Pei J, Keogh E (2010) A brief survey on sequence classification. ACM Sigkdd Explor Newsl 12(1):40\u201348","journal-title":"ACM Sigkdd Explor Newsl"},{"issue":"2","key":"813_CR72","first-page":"195","volume":"2","author":"Z Zainuddin","year":"2008","unstructured":"Zainuddin Z, Kumar M (2008) Radial basic function neural networks in protein sequence classification. Malays J Math Sci 2(2):195\u2013204","journal-title":"Malays J Math Sci"},{"issue":"1","key":"813_CR73","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1023\/A:1007652502315","volume":"42","author":"MJ Zaki","year":"2001","unstructured":"Zaki MJ (2001) SPADE: An efficient algorithm for mining frequent sequences. Mach Learn 42(1):31\u201360","journal-title":"Mach Learn"},{"issue":"01","key":"813_CR74","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1142\/S1469026804001161","volume":"4","author":"NM Zaki","year":"2004","unstructured":"Zaki NM, Deris S, Illias RM (2004) Features extraction for protein homology detection using hidden Markov models combining scores. Int J Comput Intell Appl 4(01):1\u201312","journal-title":"Int J Comput Intell Appl"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-021-00813-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10618-021-00813-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-021-00813-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,11]],"date-time":"2023-02-11T14:31:10Z","timestamp":1676125870000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10618-021-00813-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,4]]},"references-count":74,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["813"],"URL":"https:\/\/doi.org\/10.1007\/s10618-021-00813-0","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,4]]},"assertion":[{"value":"31 October 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}