{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T18:21:51Z","timestamp":1770747711364,"version":"3.49.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2014,12,9]],"date-time":"2014-12-09T00:00:00Z","timestamp":1418083200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2015,11]]},"DOI":"10.1007\/s10618-014-0397-3","type":"journal-article","created":{"date-parts":[[2014,12,8]],"date-time":"2014-12-08T09:59:00Z","timestamp":1418032740000},"page":"1838-1864","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Size matters: choosing the most informative set of window lengths for mining patterns in event sequences"],"prefix":"10.1007","volume":"29","author":[{"given":"Jefrey","family":"Lijffijt","sequence":"first","affiliation":[]},{"given":"Panagiotis","family":"Papapetrou","sequence":"additional","affiliation":[]},{"given":"Kai","family":"Puolam\u00e4ki","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,12,9]]},"reference":[{"key":"397_CR1","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1007\/s10994-009-5103-0","volume":"75","author":"D Aloise","year":"2009","unstructured":"Aloise D, Deshpande A, Hansen P, Popat P (2009) NP-hardness of Euclidean sum-of-squares clustering. Mach Learn 75:245\u2013248","journal-title":"Mach Learn"},{"issue":"11","key":"397_CR2","doi-asserted-by":"crossref","first-page":"e7678","DOI":"10.1371\/journal.pone.0007678","volume":"4","author":"EG Altmann","year":"2009","unstructured":"Altmann EG, Pierrehumbert JB, Motter AE (2009) Beyond word frequency: bursts, lulls, and scaling in the temporal distributions of words. PLoS ONE 4(11):e7678","journal-title":"PLoS ONE"},{"key":"397_CR3","unstructured":"Arthur D, Vassilvitskii S (2007) k-means++: the advantages of careful seeding. In: Proceedings of SODA"},{"issue":"2","key":"397_CR4","doi-asserted-by":"crossref","first-page":"573","DOI":"10.1093\/nar\/27.2.573","volume":"27","author":"G Benson","year":"1999","unstructured":"Benson G (1999) Tandem repeats finder: a program to analyze DNA sequences. Nucleic Acids Res 27(2):573\u2013580","journal-title":"Nucleic Acids Res"},{"key":"397_CR5","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511621024","volume-title":"Variation across speech and writing","author":"D Biber","year":"1988","unstructured":"Biber D (1988) Variation across speech and writing. Cambridge University Press, Cambridge"},{"issue":"3","key":"397_CR6","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1046\/j.1469-1809.2000.6430255.x","volume":"64","author":"C Bourgain","year":"2000","unstructured":"Bourgain C, Genin E, Quesneville H, Clerget-Darpoux F (2000) Search for multifactorial disease susceptibility genes in founder populations. Ann Hum Genet 64(3):255\u2013265","journal-title":"Ann Hum Genet"},{"issue":"3","key":"397_CR7","doi-asserted-by":"crossref","first-page":"293","DOI":"10.3233\/IDA-2008-12304","volume":"12","author":"T Calders","year":"2008","unstructured":"Calders T, Dexters N, Goethals B (2008) Mining frequent items in a stream using flexible windows. Intell Data Anal 12(3):293\u2013304","journal-title":"Intell Data Anal"},{"key":"397_CR8","doi-asserted-by":"crossref","unstructured":"Chiu B, Keogh E, Lonardi S (2003) Probabilistic discovery of time series motifs. In: Proceedings of ACM SIGKDD, pp 493\u2013498","DOI":"10.1145\/956750.956808"},{"issue":"Suppl 7","key":"397_CR9","doi-asserted-by":"crossref","first-page":"S21","DOI":"10.1186\/1471-2105-8-S7-S21","volume":"8","author":"MK Das","year":"2007","unstructured":"Das MK, Dai HK (2007) A survey of DNA motif finding algorithms. BMC Bioinform 8(Suppl 7):S21","journal-title":"BMC Bioinform"},{"key":"397_CR10","doi-asserted-by":"crossref","unstructured":"Demaine ED, L\u00f3pez-Ortiz A, Munro JI (2002) Frequency estimation of internet packet streams with limited space. In: Proceedings of ESA, pp 348\u2013360","DOI":"10.1007\/3-540-45749-6_33"},{"key":"397_CR11","unstructured":"Giannella C, Han J, Robertson E, Liu C (2003) Mining frequent itemsets over arbitrary time intervals in data streams. Technical Report TR587, Indiana University"},{"key":"397_CR12","doi-asserted-by":"crossref","unstructured":"Golab L, DeHaan D, Demaine ED, L\u00f3pez-Ortiz A, Munro JI (2003) Identifying frequent items in sliding windows over on-line packet streams. In: Proceedings of IMC, pp 173\u2013178","DOI":"10.1145\/948205.948227"},{"issue":"4","key":"397_CR13","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1075\/ijcl.13.4.02gri","volume":"13","author":"ST Gries","year":"2008","unstructured":"Gries ST (2008) Dispersions and adjusted frequencies in corpora. Int J Corpus Linguist 13(4):403\u2013437","journal-title":"Int J Corpus Linguist"},{"key":"397_CR14","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1007\/s00778-009-0171-0","volume":"19","author":"C Jin","year":"2010","unstructured":"Jin C, Yi K, Chen L, Yu JX, Lin X (2010) Sliding-window top-k queries on uncertain streams. VLDB J 19:411\u2013435","journal-title":"VLDB J"},{"key":"397_CR15","unstructured":"Jin R, Agrawal G (2005) An algorithm for in-core frequent itemset mining on streaming data. In: Proceedings of IEEE ICDM, pp 210\u2013217"},{"issue":"1","key":"397_CR16","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1145\/762471.762473","volume":"28","author":"RM Karp","year":"2003","unstructured":"Karp RM, Shenker S, Papadimitriou CH (2003) A simple algorithm for finding frequent elements in streams and bags. ACM Trans Database Syst 28(1):51\u201355","journal-title":"ACM Trans Database Syst"},{"issue":"1","key":"397_CR17","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1017\/S1351324996001246","volume":"2","author":"SM Katz","year":"1996","unstructured":"Katz SM (1996) Distribution of content words and phrases in text and language modelling. Nat Lang Eng 2(1):15\u201359","journal-title":"Nat Lang Eng"},{"key":"397_CR18","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316801","volume-title":"Finding groups in data: an introduction to cluster analysis","author":"L Kaufman","year":"1990","unstructured":"Kaufman L, Rousseeuw PJ (1990) Finding groups in data: an introduction to cluster analysis. Wiley, New York"},{"issue":"5641","key":"397_CR19","doi-asserted-by":"crossref","first-page":"1898","DOI":"10.1126\/science.1086432","volume":"301","author":"EF Kirkness","year":"2003","unstructured":"Kirkness EF, Bafna V, Halpern AL, Levy S, Remington K, Rusch DB, Delcher AL, Pop M, Wang W, Fraser CM, Venter JC (2003) The dog genome: survey sequencing and comparative analysis. Science 301(5641):1898\u20131903","journal-title":"Science"},{"key":"397_CR20","doi-asserted-by":"crossref","unstructured":"Knobbe A, Blockeel H, Koopman A, Calders T, Obladen B, Bosma C, Galenkamp H, Koenders E, Kok J (2010) Infrawatch: data management of large systems for monitoring infrastructural performance. In: Proceedings of IDA, pp 91\u2013102","DOI":"10.1007\/978-3-642-13062-5_10"},{"issue":"3","key":"397_CR21","first-page":"37","volume":"5","author":"DYW Lee","year":"2001","unstructured":"Lee DYW (2001) Genres, registers, text types, domains and styles: clarifying the concepts and navigating a path through the BNC jungle. Lang Learn Technol 5(3):37\u201372","journal-title":"Lang Learn Technol"},{"key":"397_CR22","unstructured":"Li C, Wang B, Yang X (2007a) VGRAM: improving performance of approximate queries on string collections using variable-length grams. In: Proceedings of VLDB, pp 303\u2013314"},{"issue":"4","key":"397_CR23","doi-asserted-by":"crossref","first-page":"705","DOI":"10.1086\/513205","volume":"80","author":"Y Li","year":"2007","unstructured":"Li Y, Sung WK, Liu JJ (2007b) Association mapping via regularized regression analysis of single-nucleotide-polymorphism haplotypes in variable-sized sliding windows. Am J Hum Genet 80(4):705\u2013715","journal-title":"Am J Hum Genet"},{"key":"397_CR24","doi-asserted-by":"crossref","unstructured":"Li Y, Lin J, Oates T (2012) Visualizing variable-length time series motifs. In: Proceedings of SDM, pp 895\u2013906","DOI":"10.1137\/1.9781611972825.77"},{"key":"397_CR25","doi-asserted-by":"crossref","unstructured":"Lijffijt J, Papapetrou P, Puolam\u00e4ki K, Mannila H (2011) Analyzing word frequencies in large text corpora using inter-arrival times and bootstrapping. In: Proceedings of ECML-PKDD, pp 341\u2013357","DOI":"10.1007\/978-3-642-23783-6_22"},{"key":"397_CR26","doi-asserted-by":"crossref","unstructured":"Lijffijt J, Papapetrou P, Puolam\u00e4ki K (2012) Size matters: finding the most informative set of window lengths. In: Proceedings of ECML-PKDD, pp 451\u2013466","DOI":"10.1007\/978-3-642-33486-3_29"},{"key":"397_CR27","doi-asserted-by":"crossref","unstructured":"Lin CH, Chiu DY, Wu YH, Chen ALP (2005) Mining frequent itemsets from data streams with a time-sensitive sliding window. In: Proceedings of SDM","DOI":"10.1137\/1.9781611972757.7"},{"key":"397_CR28","doi-asserted-by":"crossref","unstructured":"Madsen RE, Kauchak D, Elkan C (2005) Modeling word burstiness using the dirichlet distribution. In: Proceedings of ICML, pp 545\u2013552","DOI":"10.1145\/1102351.1102420"},{"issue":"3","key":"397_CR29","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1023\/A:1009748302351","volume":"1","author":"H Mannila","year":"1997","unstructured":"Mannila H, Toivonen H, Verkamo AI (1997) Discovery of frequent episodes in event sequences. Data Min Knowl Discov 1(3):259\u2013289","journal-title":"Data Min Knowl Discov"},{"key":"397_CR30","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1186\/1471-2156-7-38","volume":"7","author":"RA Mathias","year":"2006","unstructured":"Mathias RA, Gao P, Goldstein JL, Wilson AF, Pugh EW, Furbert-Harris P, Dunston GM, Malveaux FJ, Togias A, Barnes KC, Beaty TH, Huang SK (2006) A graphical assessment of p-values from sliding window haplotype tests of association to identify asthma susceptibility loci on chromosome 11q. BMC Genet 7:38","journal-title":"BMC Genet"},{"key":"397_CR31","doi-asserted-by":"crossref","unstructured":"Mueen A (2013) Enumeration of time series motifs of all lengths. In: Proceedings of ICDM, pp 547\u2013556","DOI":"10.1109\/ICDM.2013.27"},{"key":"397_CR32","doi-asserted-by":"crossref","unstructured":"Mueen A, Keogh EJ, Zhu Q, Cash S, Westover B (2009) Exact discovery of time series motifs. In: Proceedings of SDM, pp 473\u2013484","DOI":"10.1137\/1.9781611972795.41"},{"key":"397_CR33","doi-asserted-by":"crossref","unstructured":"Pakhira MK (2008) Fast image segmentation using modified CLARA algorithm. In: Proceedings of ICIT, pp 14\u201318","DOI":"10.1109\/ICIT.2008.22"},{"key":"397_CR34","doi-asserted-by":"crossref","unstructured":"Papadimitriou S, Yu P (2006) Optimal multi-scale patterns in time series streams. In: Proceedings of ACM SIGMOD, pp 647\u2013658","DOI":"10.1145\/1142473.1142545"},{"key":"397_CR35","doi-asserted-by":"crossref","unstructured":"Papapetrou P, Benson G, Kollios G (2006) Discovering frequent poly-regions in DNA sequences. In: Proceedings of IEEE ICDM workshops, pp 94\u201398","DOI":"10.1109\/ICDMW.2006.63"},{"issue":"4","key":"397_CR36","doi-asserted-by":"crossref","first-page":"406","DOI":"10.1504\/IJDMB.2012.049278","volume":"6","author":"P Papapetrou","year":"2012","unstructured":"Papapetrou P, Benson G, Kollios G (2012) Mining poly-regions in DNA sequences. Int J Data Min Bioinform (IJDMB) 6(4):406\u2013428","journal-title":"Int J Data Min Bioinform (IJDMB)"},{"key":"397_CR37","volume-title":"Bioelectrical signal processing in cardiac and neurological applications","author":"L S\u00f6rnmo","year":"2005","unstructured":"S\u00f6rnmo L, Laguna P (2005) Bioelectrical signal processing in cardiac and neurological applications. Elsevier Academic Press, Amsterdam"},{"issue":"Pt 6","key":"397_CR38","doi-asserted-by":"crossref","first-page":"631","DOI":"10.1111\/j.1469-1809.2009.00543.x","volume":"73","author":"R Tang","year":"2009","unstructured":"Tang R, Feng T, Sha Q, Zhang S (2009) A variable-sized sliding-window approach for genetic association studies via principal component analysis. Ann Hum Genet 73(Pt 6):631\u2013637","journal-title":"Ann Hum Genet"},{"key":"397_CR39","unstructured":"The British National Corpus (2007) Version 3 (BNC XML Edition). Distributed by Oxford University Computing Services on behalf of the BNC Consortium http:\/\/www.natcorp.ox.ac.uk\/"},{"issue":"1","key":"397_CR40","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1086\/302954","volume":"67","author":"H Toivonen","year":"2000","unstructured":"Toivonen H, Onkamo P, Vasko K, Ollikainen V, Sevon P, Mannila H, Herr M, Kere J (2000) Data mining applied to linkage disequilibrium mapping. Am J Hum Genet 67(1):133\u2013145","journal-title":"Am J Hum Genet"},{"key":"397_CR41","doi-asserted-by":"crossref","unstructured":"Vespier U, Knobbe A, Nijssen S, Vanschoren J (2012) MDL-based analysis of time series at multiple time-scales. In: Proceedings of ECML-PKDD, pp 371\u2013386","DOI":"10.1007\/978-3-642-33486-3_24"},{"key":"397_CR42","doi-asserted-by":"crossref","unstructured":"Yingchareonthawornchai S, Sivaraks H, Rakthanmanon T, Ratanamahatana CA (2013) Efficient proper length time series motif discovery. In: Proceedings of ICDM, pp 1265\u20131270","DOI":"10.1109\/ICDM.2013.111"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-014-0397-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-014-0397-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-014-0397-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,18]],"date-time":"2019-08-18T03:10:04Z","timestamp":1566097804000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-014-0397-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12,9]]},"references-count":42,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2015,11]]}},"alternative-id":["397"],"URL":"https:\/\/doi.org\/10.1007\/s10618-014-0397-3","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,12,9]]}}}