{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T01:34:27Z","timestamp":1743039267493,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":19,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540213826"},{"type":"electronic","value":"9783540247524"}],"license":[{"start":{"date-parts":[[2004,1,1]],"date-time":"2004-01-01T00:00:00Z","timestamp":1072915200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2004,1,1]],"date-time":"2004-01-01T00:00:00Z","timestamp":1072915200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2004]]},"DOI":"10.1007\/978-3-540-24752-4_15","type":"book-chapter","created":{"date-parts":[[2010,7,28]],"date-time":"2010-07-28T00:16:46Z","timestamp":1280276206000},"page":"197-208","source":"Crossref","is-referenced-by-count":0,"title":["Eliminating High-Degree Biased Character Bigrams for Dimensionality Reduction in Chinese Text Categorization"],"prefix":"10.1007","author":[{"given":"Dejun","family":"Xue","sequence":"first","affiliation":[]},{"given":"Maosong","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"1","key":"15_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F. Sebastiani","year":"2002","unstructured":"Sebastiani, F.: Machine Learning in Automated Text Categorization. ACM Computing Surveys\u00a034(1), 1\u201347 (2002)","journal-title":"ACM Computing Surveys"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Yang, Y.: Expert Network: Effective and Efficient Learning from Human Decisions in Text Categorization and Retrieval. In: Proceedings of 17th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 11\u201321 (1994)","DOI":"10.1007\/978-1-4471-2099-5_2"},{"key":"15_CR3","unstructured":"Theeramunkong, T., Lertnattee, V.: Improving Centroid-Based Text Classification Using Term-Distribution-Based Weighting System and Clustering. In: Proceedings of International Symposium on Communications and Information Technology, pp. 33\u201336 (2001)"},{"key":"15_CR4","unstructured":"Joachims, T.: A Probabilistic Analysis of the Rocchio Algorithm with TFIDF for Text Categorization. In: Proceedings of 14th of International Conference on Machine Learning, pp. 143\u2013151 (1997)"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Joachims, T.: Text Categorization with Support Vector Machines: Learnging with Many Relevant Features. In: Proceedings of 10th European Conference on Machine Learning, pp. 137\u2013142 (1998)","DOI":"10.1007\/BFb0026683"},{"key":"15_CR6","volume-title":"Introduction to Modern Information Retrieval","author":"G. Salton","year":"1983","unstructured":"Salton, G., McGill, M.: Introduction to Modern Information Retrieval. McGraw-Hill Book Company, New York (1983)"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Lewis, D.D.: An Evaluation of Phrasal and Clustered Representations on a Text Categorization. In: Proceedings of 15th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 37\u201350 (1992)","DOI":"10.1145\/133160.133172"},{"key":"15_CR8","unstructured":"Molina, L.C., Belanche, L., Nebot, A.: Feature Selection Algorithms: A Survey and Experimental Evaluation. In: Proceedings of 2nd IEEE International Conference on Data Mining, Maebashi City, Japan, pp. 306\u2013313 (2002)"},{"key":"15_CR9","unstructured":"Yang, Y., Jan Pedersen, O.: A Comparative Study on Feature Selection in Text Categorization. In: Proceedings of 14th International Conference on Machine Learning, pp. 412\u2013420 (1997)"},{"issue":"8","key":"15_CR10","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1093\/comjnl\/41.8.537","volume":"41","author":"Y.H. Li","year":"1998","unstructured":"Li, Y.H., Jain, A.K.: Classification of Text Document. The Computer Journal\u00a041(8), 537\u2013546 (1998)","journal-title":"The Computer Journal"},{"issue":"6","key":"15_CR11","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S. Deerwester","year":"1990","unstructured":"Deerwester, S., Dumais, S.T., Furnas, G.W., Landauer, T.K., Harshman, R.: Indexing by Latent Semantic Indexing. Journal of the American Society for Information Science\u00a041(6), 391\u2013407 (1990)","journal-title":"Journal of the American Society for Information Science"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Schutze, H., Hull, D.A., Jan Pedersen, O.: A comparison of Classifiers and Document Representations for the Routing Problem. In: Proceedings of 18th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 229\u2013237 (1995)","DOI":"10.1145\/215206.215365"},{"issue":"2","key":"15_CR13","first-page":"43","volume":"5","author":"J.-J. Tsay","year":"2000","unstructured":"Tsay, J.-J., Yang, J.-D.: Design and Evaluation of Approaches to Automatic Chinese Text Categorization. Computational Linguistics and Chinese Language Processing\u00a05(2), 43\u201358 (2000)","journal-title":"Computational Linguistics and Chinese Language Processing"},{"key":"15_CR14","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.1162\/153244303322753625","volume":"3","author":"R. Bekkerman","year":"2003","unstructured":"Bekkerman, R., El-Yaniv, R., Tishby, N., Winter, Y.: Distributional Word Cluster vs. Words for Text Categorization. Journal of Machine Learning Research\u00a03, 1183\u20131208 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"15_CR15","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/S0306-4573(98)00051-X","volume":"35","author":"J. Nie","year":"1999","unstructured":"Nie, J., Ren, F.: Chinese Information Retrieval: Using Characters or Words? Information Processing and Management\u00a035, 443\u2013462 (1999)","journal-title":"Information Processing and Management"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Zhou, S., Guan, J.: Chinese Documents Classification Based on N-Grams. In: Proceedings of the 3rd International Conference on Computational Linguistics and Intelligent Text Processing, Mexico City, pp. 405\u2013414 (2002)","DOI":"10.1007\/3-540-45715-1_43"},{"key":"15_CR17","unstructured":"Xue, D., Sun, M.: A Study on Feature Weighting in Chinese Text Categorization. In: Proceedings of the 4th International Conference on Computational Linguistics and Intelligent Text Processing, Mexico City, pp. 594\u2013604 (2003)"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Oakes, M., Gaizauskas, R.J., Fowkes, H.: A Method Based on the Chi-Square Test for Document Classification. In: Proceedings of 24th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 440\u2013441 (2001)","DOI":"10.1145\/383952.384080"},{"key":"15_CR19","unstructured":"Luo, S.: Statistic-Based Two-Character Chinese Word Extraction. Master Thesis of Tsinghua University, China (2003)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-24752-4_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,18]],"date-time":"2023-02-18T03:26:49Z","timestamp":1676690809000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-540-24752-4_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2004]]},"ISBN":["9783540213826","9783540247524"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-24752-4_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2004]]}}}