{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T19:51:55Z","timestamp":1760385115230},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2010,12,14]],"date-time":"2010-12-14T00:00:00Z","timestamp":1292284800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2011,8]]},"DOI":"10.1007\/s10115-010-0367-z","type":"journal-article","created":{"date-parts":[[2010,12,15]],"date-time":"2010-12-15T11:50:00Z","timestamp":1292413800000},"page":"365-393","source":"Crossref","is-referenced-by-count":28,"title":["Statistical semantics for enhancing document clustering"],"prefix":"10.1007","volume":"28","author":[{"given":"Ahmed K.","family":"Farahat","sequence":"first","affiliation":[]},{"given":"Mohamed S.","family":"Kamel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,12,14]]},"reference":[{"issue":"12","key":"367_CR1","doi-asserted-by":"crossref","first-page":"1624","DOI":"10.1109\/TKDE.2005.198","volume":"17","author":"D Cai","year":"2005","unstructured":"Cai D, He X, Han J (2005) Document clustering using locality preserving indexing. IEEE Trans Knowl Data Eng 17(12): 1624\u20131637","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"367_CR2","unstructured":"Carbonell J, Yang Y, Frederking R, Brown R, Geng Y, Lee D (1997) Translingual information retrieval: A comparative evaluation. In: Proceedings of the fifteenth international joint conference on artificial intelligence. Morgan Kaufmann, San Mateo, pp 708\u2013715"},{"issue":"2","key":"367_CR3","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1023\/A:1013625426931","volume":"18","author":"N Cristianini","year":"2002","unstructured":"Cristianini N, Shawe-Taylor J, Lodhi H (2002) Latent semantic kernels. J Intell Inf Syst 18(2): 127\u2013152","journal-title":"J Intell Inf Syst"},{"issue":"6","key":"367_CR4","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais S, Furnas G, Landauer T, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inf Sci Technol 41(6): 391\u2013407","journal-title":"J Am Soc Inf Sci Technol"},{"key":"367_CR5","doi-asserted-by":"crossref","unstructured":"Dhillon I (2001) Co-clustering documents and words using bipartite spectral graph partitioning. In: Proceedings of the seventh ACM SIGKDD international conference on knowledge discovery and data mining. ACM, New York, pp 269\u2013274","DOI":"10.1145\/502512.502550"},{"key":"367_CR6","first-page":"73","volume-title":"Survey of Text Mining","author":"I Dhillon","year":"2003","unstructured":"Dhillon I, Kogan J, Nicholas C (2003) Feature selection and document clustering. In: Berry M (eds) Survey of Text Mining. Springer, New York, pp 73\u2013100"},{"issue":"1\/2","key":"367_CR7","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1023\/A:1007612920971","volume":"42","author":"IS Dhillon","year":"2001","unstructured":"Dhillon IS, Modha DS (2001) Concept decompositions for large sparse text data using clustering. Mach Learn 42(1\/2): 143\u2013175","journal-title":"Mach Learn"},{"key":"367_CR8","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1109\/TPAMI.2008.277","volume":"32","author":"C Ding","year":"2010","unstructured":"Ding C, Li T, Jordan MI (2010) Convex and semi-nonnegative matrix factorizations. IEEE Trans Pattern Anal Mach Intell 32: 45\u201355","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"367_CR9","unstructured":"Dongen S (2000) Performance criteria for graph clustering and Markov cluster experiments. Technical report, CWI (Centre for Mathematics and Computer Science), Amsterdam, The Netherlands"},{"issue":"1","key":"367_CR10","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1137\/S0097539704442684","volume":"36","author":"P Drineas","year":"2007","unstructured":"Drineas P, Kannan R, Mahoney M (2007) Fast Monte Carlo algorithms for matrices I: approximating matrix multiplication. SIAM J Comput 36(1): 132\u2013157","journal-title":"SIAM J Comput"},{"key":"367_CR11","unstructured":"Farahat AK, Kamel MS (2009) Document clustering using semantic kernels based on term\u2013term correlations. In: Proceedings of the 2009 IEEE international conference on data mining workshops. IEEE Computer Society, Washington, DC, pp 459\u2013464"},{"key":"367_CR12","unstructured":"Farahat AK, Kamel MS (2010) Enhancing document clustering using hybrid models for semantic similarity. In: Proceedings of the eighth workshop on text mining at the tenth SIAM international conference on data mining. SIAM, Philadelphia, pp 83\u201392"},{"key":"367_CR13","doi-asserted-by":"crossref","unstructured":"Fung B, Wang K, Ester M (2003) Hierarchical document clustering using frequent itemsets. In: Proceedings of the third SIAM international conference on data mining. SIAM, Philadelphia, pp 59\u201370","DOI":"10.1137\/1.9781611972733.6"},{"issue":"6","key":"367_CR14","doi-asserted-by":"crossref","first-page":"1753","DOI":"10.1002\/j.1538-7305.1983.tb03513.x","volume":"62","author":"G Furnas","year":"1983","unstructured":"Furnas G, Landauer T, Gomez L, Dumais S (1983) Statistical semantics: analysis of the potential performance of keyword information systems. Bell Syst Tech J 62(6): 1753\u20131806","journal-title":"Bell Syst Tech J"},{"key":"367_CR15","unstructured":"Gabrilovich E, Markovitch S (2007) Computing semantic relatedness using Wikipedia-based explicit semantic analysis. In: Proceedings of the twentieth international joint conference on artificial intelligence. Morgan Kaufmann, San Mateo, pp 6\u201312"},{"key":"367_CR16","doi-asserted-by":"crossref","unstructured":"Han E, Boley D, Gini M, Gross R, Hastings K, Karypis G, Kumar V, Mobasher B, Moore J (1998) WebACE: a web agent for document categorization and exploration. In: Proceedings of the second international conference on autonomous agents. ACM, New York, pp 408\u2013415","DOI":"10.1145\/280765.280872"},{"issue":"1","key":"367_CR17","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/S0167-9473(02)00070-1","volume":"41","author":"X He","year":"2002","unstructured":"He X, Zha H, Ding C, Simon H (2002) Web document clustering using hyperlink structures. Comput Stat Data Anal 41(1): 19\u201345","journal-title":"Comput Stat Data Anal"},{"key":"367_CR18","unstructured":"Hotho A, Staab S, Stumme G (2003) WordNet improves text document clustering. In: Proceedings of the SIGIR 2003 semantic web workshop. ACM, New York, pp 541\u2013544"},{"key":"367_CR19","doi-asserted-by":"crossref","unstructured":"Hu X, Zhang X, Lu C, Park EK, Zhou X (2009) Exploiting wikipedia as external knowledge for document clustering. In: Proceedings of the fifteenth ACM SIGKDD international conference on knowledge discovery and data mining. ACM, New York, pp 389\u2013396","DOI":"10.1145\/1557019.1557066"},{"key":"367_CR20","doi-asserted-by":"crossref","unstructured":"Huang A, Milne D, Frank E, Witten I (2009) Clustering documents using a Wikipedia-based concept representation. In: Proceedings of the thirteenth Pacific-Asia conference on advances in knowledge discovery and data mining. Springer, Berlin, pp 628\u2013636","DOI":"10.1007\/978-3-642-01307-2_62"},{"issue":"3","key":"367_CR21","doi-asserted-by":"crossref","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"AK Jain","year":"1999","unstructured":"Jain AK, Murty MN, Flynn PJ (1999) Data clustering: a review. ACM Comput Surv 31(3): 264\u2013323","journal-title":"ACM Comput Surv"},{"key":"367_CR22","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1007\/s10115-009-0256-5","volume":"25","author":"L Jing","year":"2010","unstructured":"Jing L, Ng M, Huang J (2010) Knowledge-based vector space model for text clustering. Knowl Inf Syst 25: 35\u201355","journal-title":"Knowl Inf Syst"},{"key":"367_CR23","volume-title":"Principal component analysis","author":"I Jolliffe","year":"2002","unstructured":"Jolliffe I (2002) Principal component analysis. Springer, New York"},{"key":"367_CR24","doi-asserted-by":"crossref","unstructured":"Karypis G (2003) CLUTO\u2014a clustering toolkit. Technical Report #02-017, University of Minnesota, Department of Computer Science, Minnesota, MN, USA","DOI":"10.21236\/ADA439508"},{"key":"367_CR25","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1038\/44565","volume":"401","author":"D Lee","year":"1999","unstructured":"Lee D, Seung H (1999) Learning the parts of objects by non-negative matrix factorization. Nature 401: 788\u2013791","journal-title":"Nature"},{"key":"367_CR26","unstructured":"Lewis D (1999) Reuters-21578 text categorization test collection distribution 1.0"},{"key":"367_CR27","doi-asserted-by":"crossref","unstructured":"Meila M (2003) Comparing clusterings by the variation of information. In: Learning theory and Kernel Machines. Springer, Berlin, pp 173\u2013187","DOI":"10.1007\/978-3-540-45167-9_14"},{"issue":"11","key":"367_CR28","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller GA (1995) WordNet: a lexical database for English. Commun ACM 38(11): 39\u201341","journal-title":"Commun ACM"},{"issue":"2","key":"367_CR29","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1016\/j.ipm.2009.09.007","volume":"46","author":"J-F Pessiot","year":"2010","unstructured":"Pessiot J-F, Kim Y-M, Amini MR, Gallinari P (2010) Improving document clustering in a learned concept space. Inf Process Manage 46(2): 180\u2013192","journal-title":"Inf Process Manage"},{"issue":"11","key":"367_CR30","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A, Yang CS (1975) A vector space model for automatic indexing. Commun ACM 18(11): 613\u2013620","journal-title":"Commun ACM"},{"key":"367_CR31","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1007\/BFb0020217","volume":"1327","author":"B Scholkopf","year":"1997","unstructured":"Scholkopf B, Smola A, Muller K (1997) Kernel principal component analysis. Lect Notes Comput Sci 1327: 583\u2013588","journal-title":"Lect Notes Comput Sci"},{"key":"367_CR32","doi-asserted-by":"crossref","unstructured":"Sch\u00fctze H, Silverstein C (1997) Projections for efficient document clustering. In: Proceedings of the twentieth annual international ACM SIGIR conference on research and development in information retrieval, SIGIR \u201997. ACM, New York, pp 74\u201381","DOI":"10.1145\/258525.258539"},{"key":"367_CR33","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511809682","volume-title":"Kernel methods for pattern analysis","author":"J Shawe-Taylor","year":"2004","unstructured":"Shawe-Taylor J, Cristianini N (2004) Kernel methods for pattern analysis. Cambridge University Press, Cambridge"},{"key":"367_CR34","doi-asserted-by":"crossref","unstructured":"Slonim N, Tishby N (2000) Document clustering using word clusters via the information bottleneck method. In: Proceedings of the twenty-third annual international ACM SIGIR conference on research and development in information retrieval. ACM, New York, pp 208\u2013215","DOI":"10.1145\/345508.345578"},{"issue":"4","key":"367_CR35","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1007\/s11222-007-9033-z","volume":"17","author":"U Luxburg von","year":"2007","unstructured":"von Luxburg U (2007) A tutorial on spectral clustering. Stat Comput 17(4): 395\u2013416","journal-title":"Stat Comput"},{"issue":"3","key":"367_CR36","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1007\/s10115-008-0152-4","volume":"19","author":"P Wang","year":"2009","unstructured":"Wang P, Hu J, Zeng H, Chen Z (2009) Using wikipedia knowledge to improve text classification. Knowl Inf Syst 19(3): 265\u2013281","journal-title":"Knowl Inf Syst"},{"key":"367_CR37","doi-asserted-by":"crossref","unstructured":"Wong SKM, Ziarko W, Wong PCN (1985) Generalized vector spaces model in information retrieval. In: Proceedings of the eighth annual international ACM SIGIR conference on research and development in information retrieval. ACM, New York, pp 18\u201325","DOI":"10.1145\/253495.253506"},{"key":"367_CR38","doi-asserted-by":"crossref","unstructured":"Wu J, Xiong H, Chen J (2009) Adapting the right measures for k-means clustering. In: Proceedings of the fifteenth ACM SIGKDD international conference on knowledge discovery and data mining. ACM, New York, pp 877\u2013886","DOI":"10.1145\/1557019.1557115"},{"key":"367_CR39","doi-asserted-by":"crossref","unstructured":"Xu W, Liu X, Gong Y (2003) Document clustering based on non-negative matrix factorization. In: Proceedings of the twenty-sixth annual international ACM SIGIR conference on research and development in information retrieval. ACM, New York, pp 267\u2013273","DOI":"10.1145\/860435.860485"},{"issue":"3","key":"367_CR40","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1023\/B:MACH.0000027785.44527.d6","volume":"55","author":"Y Zhao","year":"2004","unstructured":"Zhao Y, Karypis G (2004) Empirical and theoretical comparisons of selected criterion functions for document clustering. Mach Learn 55(3): 311\u2013331","journal-title":"Mach Learn"},{"issue":"2","key":"367_CR41","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/s10618-005-0361-3","volume":"10","author":"Y Zhao","year":"2005","unstructured":"Zhao Y, Karypis G (2005) Hierarchical clustering algorithms for document datasets. Data Min Knowl Discov 10(2): 141\u2013168","journal-title":"Data Min Knowl Discov"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-010-0367-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-010-0367-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-010-0367-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,6]],"date-time":"2019-06-06T23:19:17Z","timestamp":1559863157000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-010-0367-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12,14]]},"references-count":41,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2011,8]]}},"alternative-id":["367"],"URL":"https:\/\/doi.org\/10.1007\/s10115-010-0367-z","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,12,14]]}}}