{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,31]],"date-time":"2022-03-31T09:43:07Z","timestamp":1648719787651},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2015,3,21]],"date-time":"2015-03-21T00:00:00Z","timestamp":1426896000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2015,9]]},"DOI":"10.1007\/s10618-015-0413-2","type":"journal-article","created":{"date-parts":[[2015,3,20]],"date-time":"2015-03-20T07:49:59Z","timestamp":1426837799000},"page":"1280-1311","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DRESS: dimensionality reduction for efficient sequence search"],"prefix":"10.1007","volume":"29","author":[{"given":"Alexios","family":"Kotsifakos","sequence":"first","affiliation":[]},{"given":"Alexandra","family":"Stefan","sequence":"additional","affiliation":[]},{"given":"Vassilis","family":"Athitsos","sequence":"additional","affiliation":[]},{"given":"Gautam","family":"Das","sequence":"additional","affiliation":[]},{"given":"Panagiotis","family":"Papapetrou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,3,21]]},"reference":[{"key":"413_CR1","doi-asserted-by":"crossref","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"S Altschul","year":"1997","unstructured":"Altschul S, Madden T, Schffer R, Zhang J, Zhang Z, Miller W, Lipman D (1997) Gapped blast and psi-blast: a new generation of protein database search programs. Nucleic Acids Res 25:3389\u20133402","journal-title":"Nucleic Acids Res"},{"issue":"3","key":"413_CR2","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","volume":"215","author":"SF Altschul","year":"1990","unstructured":"Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ (1990) Basic local alignment search tool. J Mol Biol 215(3):403\u2013410","journal-title":"J Mol Biol"},{"key":"413_CR3","unstructured":"Arasu A, Ganti V, Kaushik R (2006) Efficient exact set-similarity joins. In: Proceedings of very large database endowment (PVLDB), pp 918\u2013929"},{"issue":"10","key":"413_CR4","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1145\/135239.135243","volume":"35","author":"R Baeza-Yates","year":"1992","unstructured":"Baeza-Yates R, Gonnet GH (1992) A new approach to text searching. Commun ACM 35(10):74\u201382","journal-title":"Commun ACM"},{"key":"413_CR5","unstructured":"Behm A, Vernica R, Alsubaiee S, Ji S, Lu J, Jin L, Lu Y, Li C (2010) UCI Flamingo Package 4.0. http:\/\/flamingo.ics.uci.edu\/releases\/4.0\/"},{"issue":"Web\u2013Server\u2013Issu","key":"413_CR6","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1093\/nar\/gkl157","volume":"34","author":"R Bhadra","year":"2006","unstructured":"Bhadra R, Sandhya S, Abhinandan KR, Chakrabarti S, Sowdhamini R, Srinivasan N (2006) Cascade psi-blast web server: a remote homology search tool for relating protein domains. Nucleic Acids Res 34(Web\u2013Server\u2013Issue):143\u2013146","journal-title":"Nucleic Acids Res"},{"key":"413_CR7","unstructured":"Burrows M, Wheeler DJ (1994) A block-sorting lossless data compression algorithm. Tech. Rep. 124, Systems Research Center, Palo Alto, http:\/\/citeseerx.ist.psu.edu\/viewdoc\/summary?doi=10.1.1.37.6774"},{"issue":"5","key":"413_CR8","doi-asserted-by":"crossref","first-page":"530","DOI":"10.1109\/TPAMI.2003.1195989","volume":"25","author":"G Hjaltason","year":"2003","unstructured":"Hjaltason G, Samet H (2003) Properties of embedding methods for similarity searching in metric spaces. IEEE Trans Pattern Anal Mach Intell (PAMI) 25(5):530\u2013549","journal-title":"IEEE Trans Pattern Anal Mach Intell (PAMI)"},{"key":"413_CR9","first-page":"76","volume":"1","author":"CV Jongeneel","year":"2000","unstructured":"Jongeneel CV (2000) Searching the expressed sequence tag (est) databases: panning for genes. Bioinformatics 1:76\u201392","journal-title":"Bioinformatics"},{"issue":"4","key":"413_CR10","doi-asserted-by":"crossref","first-page":"672","DOI":"10.1101\/gr.1963804","volume":"14","author":"KJ Kalafus","year":"2004","unstructured":"Kalafus KJ, Jackson AR, Milosavljevic A (2004) Pash: efficient genome-scale sequence anchoring by positional hashing. Genome Resour 14(4):672\u2013678","journal-title":"Genome Resour"},{"key":"413_CR11","doi-asserted-by":"crossref","unstructured":"Kent WJ (2002) Resource BLAT-The BLAST-like alignment tool. Genome Res","DOI":"10.1101\/gr.229202. Article published online before March 2002"},{"key":"413_CR12","unstructured":"Kim MS, Whang KY, Lee JG, Lee MJ (2005a) n-gram\/2l: a space and time efficient two-level n-gram inverted index structure. In: Proceedings of the 31st international conference on very large data bases, VLDB Endowment, pp 325\u2013336"},{"key":"413_CR13","doi-asserted-by":"crossref","first-page":"4335","DOI":"10.1093\/nar\/gki739","volume":"33","author":"YJ Kim","year":"2005","unstructured":"Kim YJ, Boyd A, Athey BD, Patel JM (2005b) miblast: scalable evaluation of a batch of nucleotide sequence queries with blast. Nucleic Acids Res 33:4335\u20134344","journal-title":"Nucleic Acids Res"},{"key":"413_CR14","doi-asserted-by":"crossref","first-page":"1052","DOI":"10.1093\/bioinformatics\/16.11.1052","volume":"16","author":"I Korf","year":"2000","unstructured":"Korf I, Gish W (2000) Mpblast : improved blast performance with multiplexed queries. Bioinformatics 16:1052\u20131053","journal-title":"Bioinformatics"},{"issue":"3","key":"413_CR15","doi-asserted-by":"crossref","first-page":"R25","DOI":"10.1186\/gb-2009-10-3-r25","volume":"10","author":"B Langmead","year":"2009","unstructured":"Langmead B, Trapnell C, Pop M, Salzberg SL et al (2009) Ultrafast and memory-efficient alignment of short dna sequences to the human genome. Genome Biol 10(3):R25","journal-title":"Genome Biol"},{"key":"413_CR16","unstructured":"Li C, Wang B, Yang X (2007) Vgram: improving performance of approximate queries on string collections using variable-length grams. In: Proceedings of the 33rd international conference on Very large data bases, VLDB Endowment, pp 303\u2013314"},{"key":"413_CR17","doi-asserted-by":"crossref","unstructured":"Li C, Lu J, Lu Y (2008a) Efficient merging and filtering algorithms for approximate string searches. International conference on data engineering (ICDE)","DOI":"10.1109\/ICDE.2008.4497434"},{"issue":"11","key":"413_CR18","doi-asserted-by":"crossref","first-page":"1851","DOI":"10.1101\/gr.078212.108","volume":"18","author":"H Li","year":"2008","unstructured":"Li H, Ruan J, Durbin R (2008b) Mapping short dna sequencing reads and calling variants using mapping quality scores. Genome Res 18(11):1851\u20131858","journal-title":"Genome Res"},{"issue":"5","key":"413_CR19","doi-asserted-by":"crossref","first-page":"713","DOI":"10.1093\/bioinformatics\/btn025","volume":"24","author":"R Li","year":"2008","unstructured":"Li R, Li Y, Kristiansen K, Wang J (2008c) Soap: short oligonucleotide alignment program. Bioinformatics 24(5):713\u2013714","journal-title":"Bioinformatics"},{"issue":"4","key":"413_CR20","first-page":"28","volume":"37","author":"Y Li","year":"2012","unstructured":"Li Y, Patel JM, Terrell A (2012) Wham: a high-throughput sequence alignment method. ACM Trans Database Syst (TODS) 37(4):28","journal-title":"ACM Trans Database Syst (TODS)"},{"key":"413_CR21","unstructured":"Litwin W, Mokadem R, Rigaux P, Schwarz T (2007) Fast ngram-based string search over data encoded using algebraic signatures. In: Proceedings of the very large database endowment (PVLDB), pp 207\u2013218"},{"issue":"9\u201310","key":"413_CR22","doi-asserted-by":"crossref","first-page":"775","DOI":"10.1002\/minf.201300084","volume":"32","author":"B Liu","year":"2013","unstructured":"Liu B, Wang X, Zou Q, Dong Q, Chen Q (2013) Protein remote homology detection by combining chous pseudo amino acid composition and profile-based protein representation. Mol Inf 32(9\u201310):775\u2013782","journal-title":"Mol Inf"},{"key":"413_CR23","doi-asserted-by":"crossref","unstructured":"Meek C, Patel JM, Kasetty S (2003) Oasis: an online and accurate technique for local-alignment searches on biological sequences. In: Proceedings of very large database endowment (PVLDB), vol 29, pp 910\u2013921","DOI":"10.1016\/B978-012722442-8\/50085-9"},{"issue":"3","key":"413_CR24","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1016\/0022-2836(70)90057-4","volume":"48","author":"SB Needleman","year":"1970","unstructured":"Needleman SB, Wunsch CD (1970) A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48(3):443\u2013453","journal-title":"J Mol Biol"},{"issue":"10","key":"413_CR25","doi-asserted-by":"crossref","first-page":"1725","DOI":"10.1101\/gr.194201","volume":"11","author":"Z Ning","year":"2001","unstructured":"Ning Z, Cox AJ, Mullikin JC (2001) SSAHA: A fast search method for large dna databases. Genome Resour 11(10):1725\u20131729","journal-title":"Genome Resour"},{"issue":"1","key":"413_CR26","first-page":"205","volume":"2","author":"P Papapetrou","year":"2009","unstructured":"Papapetrou P, Athitsos V, Kollios G, Gunopulos D (2009) Reference-based alignment in large sequence databases. Proc Very Large Database Endow (PVLDB) 2(1):205\u2013216","journal-title":"Proc Very Large Database Endow (PVLDB)"},{"issue":"1","key":"413_CR27","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1016\/0022-2836(81)90087-5","volume":"147","author":"TF Smith","year":"1981","unstructured":"Smith TF, Waterman MS (1981) Identification of common molecular subsequences. J Mol Biol 147(1):195\u2013197","journal-title":"J Mol Biol"},{"issue":"2","key":"413_CR28","doi-asserted-by":"crossref","first-page":"232","DOI":"10.1093\/bioinformatics\/btl571","volume":"23","author":"Y Tian","year":"2007","unstructured":"Tian Y, Mceachin RC, Santos C, States DJ, Patel JM (2007) Saga: A subgraph matching tool for biological graphs. Bioinformatics 23(2):232\u2013239","journal-title":"Bioinformatics"},{"key":"413_CR29","doi-asserted-by":"crossref","unstructured":"Traina C, Traina AJM, Seeger B, Faloutsos C (2000) Slim-trees: high performance metric trees minimizing overlap between nodes. International conference on extending database technology (EDBT), pp 51\u201365","DOI":"10.1007\/3-540-46439-5_4"},{"key":"413_CR30","unstructured":"Venkateswaran J, Lachwani D, Kahveci T, Jermaine C (2006) Reference-based indexing of sequence databases. In: International conference on very large databases (VLDB), pp 906\u2013917"},{"issue":"6","key":"413_CR31","doi-asserted-by":"crossref","first-page":"779","DOI":"10.1007\/s00778-012-0270-1","volume":"21","author":"T Vergoulis","year":"2012","unstructured":"Vergoulis T, Dalamagas T, Sacharidis D, Sellis TK (2012) Approximate regional sequence matching for genomic databases. VLDB J 21(6):779\u2013795","journal-title":"VLDB J"},{"key":"413_CR32","unstructured":"Vieira MR, Traina C, Chino FJT, Traina AJM (2004) Dbm-tree: a dynamic metric access method sensitive to local density data. Brazilian symposium on databases (SBBD), pp 163\u2013177"},{"key":"413_CR33","doi-asserted-by":"crossref","unstructured":"Wandelt S, Starlinger J, Bux M, Leser U (2013) Rcsi: scalable similarity search in thousand(s) of genomes. Proceedings of the VLDB Endowment (PVLDB) p (to appear)","DOI":"10.14778\/2536258.2536265"},{"issue":"10","key":"413_CR34","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1145\/135239.135244","volume":"35","author":"S Wu","year":"1992","unstructured":"Wu S, Manber U (1992) Fast text searching: allowing errors. Commun ACM 35(10):83\u201391","journal-title":"Commun ACM"},{"issue":"4","key":"413_CR35","doi-asserted-by":"crossref","first-page":"960","DOI":"10.1145\/1114244.1114248","volume":"30","author":"X Yan","year":"2005","unstructured":"Yan X, Yu PS, Han J (2005) Graph indexing based on discriminative frequent structure analysis. ACM Trans Database Syst 30(4):960\u2013993","journal-title":"ACM Trans Database Syst"},{"key":"413_CR36","doi-asserted-by":"crossref","unstructured":"Yang X, Wang B, Li C (2008) Cost-based variable-length-gram selection for string collections to support approximate queries efficiently. In: Proceedings of the 2008 ACM SIGMOD international conference on Management of data, ACM, pp 353\u2013364","DOI":"10.1145\/1376616.1376655"},{"key":"413_CR37","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1089\/10665270050081478","volume":"7","author":"Z Zhang","year":"2000","unstructured":"Zhang Z, Schwartz S, Wagner L, Miller W (2000) A greedy algorithm for aligning dna sequences. J Comput Biol 7:203\u2013214","journal-title":"J Comput Biol"},{"issue":"11","key":"413_CR38","doi-asserted-by":"crossref","first-page":"1579","DOI":"10.14778\/2350229.2350271","volume":"5","author":"H Zhu","year":"2012","unstructured":"Zhu H, Kollios G, Athitsos V (2012) A generic framework for efficient and effective subsequence retrieval. Proc VLDB Endow (PVLDB) 5(11):1579\u20131590","journal-title":"Proc VLDB Endow (PVLDB)"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-015-0413-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-015-0413-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-015-0413-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,22]],"date-time":"2019-08-22T01:17:33Z","timestamp":1566436653000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-015-0413-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,3,21]]},"references-count":38,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2015,9]]}},"alternative-id":["413"],"URL":"https:\/\/doi.org\/10.1007\/s10618-015-0413-2","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,3,21]]}}}