{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,29]],"date-time":"2025-03-29T04:10:32Z","timestamp":1743221432964,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299575"},{"type":"electronic","value":"9783642299582"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29958-2_2","type":"book-chapter","created":{"date-parts":[[2012,5,20]],"date-time":"2012-05-20T13:19:58Z","timestamp":1337519998000},"page":"19-34","source":"Crossref","is-referenced-by-count":1,"title":["Clustering Documents with Maximal Substrings"],"prefix":"10.1007","author":[{"given":"Tomonari","family":"Masada","sequence":"first","affiliation":[]},{"given":"Atsuhiro","family":"Takasu","sequence":"additional","affiliation":[]},{"given":"Yuichiro","family":"Shibata","sequence":"additional","affiliation":[]},{"given":"Kiyoshi","family":"Oguri","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"2_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/3-540-45735-6_4","volume-title":"String Processing and Information Retrieval","author":"M. Abouelhoda","year":"2002","unstructured":"Abouelhoda, M., Ohlebusch, E., Kurtz, S.: Optimal Exact String Matching Based on Suffix Arrays. In: Laender, A.H.F., Oliveira, A.L. (eds.) SPIRE 2002. LNCS, vol.\u00a02476, pp. 31\u201343. Springer, Heidelberg (2002)"},{"key":"2_CR2","first-page":"993","volume":"3","author":"D. Blei","year":"2003","unstructured":"Blei, D., Ng, A., Jordan, M.: Latent Dirichlet Allocation. Journal of Machine Learning Research\u00a03, 993\u20131022 (2003)","journal-title":"Journal of Machine Learning Research"},{"doi-asserted-by":"crossref","unstructured":"Chen, X., Hu, X., Shen, X., Rosen, G.: Probabilistic Topic Modeling for Genomic Data Interpretation. In: Park, T., Tsui, S.K.-W., Chen, L., Ng, M.K., Wong, L., Hu, X. (eds.) IEEE International Conference on Bioinformatics and Biomedicine, pp. 18\u201321. IEEE (2010)","key":"2_CR3","DOI":"10.1109\/BIBM.2010.5706554"},{"doi-asserted-by":"crossref","unstructured":"Choi, K.-S., Isahara, H., Kanzaki, K., Kim, H., Pak, S.M., Sun, M.: Word Segmentation Standard in Chinese, Japanese and Korean. In: 7th Workshop on Asian Language Resources, pp. 179\u2013186. Association for Computational Linguistics (2009)","key":"2_CR4","DOI":"10.3115\/1690299.1690325"},{"doi-asserted-by":"crossref","unstructured":"Chumwatana, T., Wong, K., Xie, H.: An Automatic Indexing Technique for Thai Texts Using Frequent Max Substring. In: Imsombut, A. (ed.) Eighth International Symposium on Natural Language Processing, pp. 67\u201372. IEEE (2009)","key":"2_CR5","DOI":"10.1109\/SNLP.2009.5340946"},{"key":"2_CR6","doi-asserted-by":"publisher","first-page":"117","DOI":"10.4236\/jilsa.2010.23015","volume":"2","author":"T. Chumwatana","year":"2010","unstructured":"Chumwatana, T., Wong, K., Xie, H.: A SOM-Based Document Clustering Using Frequent Max Substrings for Non-Segmented Texts. Journal of Intelligent Learning Systems & Applications\u00a02, 117\u2013125 (2010)","journal-title":"Journal of Intelligent Learning Systems & Applications"},{"key":"2_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/3-540-48194-X_17","volume-title":"Combinatorial Pattern Matching","author":"T. Kasai","year":"2001","unstructured":"Kasai, T., Lee, G., Arimura, H., Arikawa, S., Park, K.: Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its Applications. In: Amir, A., Landau, G.M. (eds.) CPM 2001. LNCS, vol.\u00a02089, pp. 181\u2013192. Springer, Heidelberg (2001)"},{"unstructured":"Gang, S.: Korean Morphological Analyzer KLT Version 2.10b (2009), http:\/\/nlp.kookmin.ac.kr\/HAM\/kor\/","key":"2_CR8"},{"key":"2_CR9","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.datak.2007.08.001","volume":"64","author":"Y. Li","year":"2008","unstructured":"Li, Y., Chung, S.M., Holt, J.D.: Text Document Clustering Based on Frequent Word Meaning Sequences. Data & Knowledge Engineering\u00a064, 381\u2013404 (2008)","journal-title":"Data & Knowledge Engineering"},{"doi-asserted-by":"crossref","unstructured":"Madsen, R., Kauchak, D., Elkan, C.: Modeling Word Burstiness Using the Dirichlet Distribution. In: Raedt, L.D., Wrobel, S. (eds.) 22nd International Conference on Machine Learning, pp. 545\u2013552. ACM (2005)","key":"2_CR10","DOI":"10.1145\/1102351.1102420"},{"doi-asserted-by":"crossref","unstructured":"Masada, T., Shibata, Y., Oguri, K.: Documents as a Bag of Maximal Substrings: An Unsupervised Feature Extraction for Document Clustering. In: 13th International Conference on Enterprise Information Systems, pp.5\u201313. INSTICC (2011)","key":"2_CR11","DOI":"10.5220\/0003403300050013"},{"unstructured":"Minka, T.: Estimating a Dirichlet Distribution (2000), http:\/\/research.microsoft.com\/en-us\/um\/people\/minka\/papers\/dirichlet\/","key":"2_CR12"},{"doi-asserted-by":"crossref","unstructured":"Mochihashi, D., Yamada, T., Ueda, N.: Bayesian Unsupervised Word Segmentation with Nested Pitman-Yor Language Modeling. In: Joint Conference of the 47th Annual Meeting of the Association for Computational Linguistics and the Fourth International Joint Conference on Natural Language Processing of the Asian Federation of Natural Language Processing, pp. 100\u2013108. Association for Computational Linguistics (2009)","key":"2_CR13","DOI":"10.3115\/1687878.1687894"},{"doi-asserted-by":"crossref","unstructured":"Navarro, G., M\u00e4kinen, V.: Compressed Full-Text Indexes. ACM Comput. Surv. 39(1) (2007)","key":"2_CR14","DOI":"10.1145\/1216370.1216372"},{"issue":"2\/3","key":"2_CR15","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1023\/A:1007692713085","volume":"39","author":"K. Nigam","year":"2000","unstructured":"Nigam, K., McCallum, A., Thrun, S., Mitchell, T.: Text Classification from Labeled and Unlabeled Documents Using EM. Machine Learning\u00a039(2\/3), 103\u2013134 (2000)","journal-title":"Machine Learning"},{"unstructured":"Nong, G., Zhang, S., Chan, W.H.: Two Efficient Algorithms for Linear Time Suffix Array Construction. IEEE Transactions on Computers 99(PrePrints) (2008)","key":"2_CR16"},{"doi-asserted-by":"crossref","unstructured":"Okanohara, D., Tsujii, J.: Text Categorization with All Substring Features. In: Ninth SIAM International Conference on Data Mining, pp. 838\u2013846. Society for Industrial and Applied Mathematics (2009)","key":"2_CR17","DOI":"10.1137\/1.9781611972795.72"},{"doi-asserted-by":"crossref","unstructured":"Poon, H., Cherry, C., Toutanova, K.: Unsupervised Morphological Segmentation with Log-Linear Models. In: Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics, pp. 209\u2013217. Association for Computational Linguistics (2009)","key":"2_CR18","DOI":"10.3115\/1620754.1620785"},{"doi-asserted-by":"crossref","unstructured":"Sutton, C., McCallum, A.: An Introduction to Conditional Random Fields for Relational Learning. In: Getoor, L., Taskar, B. (eds.) Introduction to Statistical Relational Learning, pp. 93\u2013128. The MIT Press (2007)","key":"2_CR19","DOI":"10.7551\/mitpress\/7432.003.0006"},{"doi-asserted-by":"crossref","unstructured":"Teh, Y.W.: A Hierarchical Bayesian Language Model Based on Pitman-Yor Processes. In: The 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics, pp. 985\u2013992. Association for Computational Linguistics (2006)","key":"2_CR20","DOI":"10.3115\/1220175.1220299"},{"unstructured":"Tseng, H., Chang, P., Andrew, G., Jurafsky, D., Manning, C.: A Conditional Random Field Word Segmenter for SIGHAN Bakeoff 2005. In: Fourth SIGHAN Workshop on Chinese Language Processing, pp. 168\u2013171. Association for Computational Linguistics (2005)","key":"2_CR21"},{"doi-asserted-by":"crossref","unstructured":"Tsuruoka, Y., Tsujii, J., Ananiadou, S.: Stochastic Gradient Descent Training for L1-Regularized Log-Linear Models with Cumulative Penalty. In: Joint Conference of the 47th Annual Meeting of the Association for Computational Linguistics and the fourth International Joint Conference on Natural Language Processing of the Asian Federation of Natural Language Processing, pp. 477\u2013485. Association for Computational Linguistics (2009)","key":"2_CR22","DOI":"10.3115\/1687878.1687946"},{"doi-asserted-by":"crossref","unstructured":"Wang, X., McCallum, A.: Topics over Time: a Non-Markov Continuous-Time Model of Topical Trends. In: 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 424\u2013433. ACM (2006)","key":"2_CR23","DOI":"10.1145\/1150402.1150450"},{"key":"2_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-540-24655-8_8","volume-title":"Advanced Web Technologies and Applications","author":"D. Zhang","year":"2004","unstructured":"Zhang, D., Dong, Y.: Semantic, Hierarchical, Online Clustering of Web Search Results. In: Yu, J.X., Lin, X., Lu, H., Zhang, Y. (eds.) APWeb 2004. LNCS, vol.\u00a03007, pp. 69\u201378. Springer, Heidelberg (2004)"},{"doi-asserted-by":"crossref","unstructured":"Zhang, D., Lee, W.: Extracting Key-Substring-Group Features for Text Classification. In: 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 474\u2013483. ACM (2006)","key":"2_CR25","DOI":"10.1145\/1150402.1150455"}],"container-title":["Lecture Notes in Business Information Processing","Enterprise Information Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29958-2_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T10:21:07Z","timestamp":1743157267000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29958-2_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299575","9783642299582"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29958-2_2","relation":{},"ISSN":["1865-1348","1865-1356"],"issn-type":[{"type":"print","value":"1865-1348"},{"type":"electronic","value":"1865-1356"}],"subject":[],"published":{"date-parts":[[2012]]}}}