{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T18:54:01Z","timestamp":1768589641474,"version":"3.49.0"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2014,7,3]],"date-time":"2014-07-03T00:00:00Z","timestamp":1404345600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2015,5]]},"DOI":"10.1007\/s10618-014-0358-x","type":"journal-article","created":{"date-parts":[[2014,7,2]],"date-time":"2014-07-02T11:55:54Z","timestamp":1404302154000},"page":"593-625","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["CenKNN: a scalable and effective text classifier"],"prefix":"10.1007","volume":"29","author":[{"given":"Guansong","family":"Pang","sequence":"first","affiliation":[]},{"given":"Huidong","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Shengyi","family":"Jiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,7,3]]},"reference":[{"issue":"4","key":"358_CR1","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1016\/S0022-0000(03)00025-4","volume":"66","author":"D Achlioptas","year":"2003","unstructured":"Achlioptas D (2003) Database-friendly random projections: Johnson\u2013Lindenstrauss with binary coins. J Comput Syst Sci 66(4):671\u2013687","journal-title":"J Comput Syst Sci"},{"key":"358_CR2","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4614-3223-4","volume-title":"Mining text data","author":"CC Aggarwal","year":"2012","unstructured":"Aggarwal CC, Zhai C (2012) A survey of text classification algorithms. Mining text data. Springer, New York"},{"issue":"1","key":"358_CR3","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"GE Batista","year":"2004","unstructured":"Batista GE, Prati RC, Monard MC (2004) A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD Explor Newslett 6(1):20\u201329","journal-title":"ACM SIGKDD Explor Newslett"},{"issue":"9","key":"358_CR4","doi-asserted-by":"crossref","first-page":"509","DOI":"10.1145\/361002.361007","volume":"18","author":"JL Bentley","year":"1975","unstructured":"Bentley JL (1975) Multidimensional binary search trees used for associative searching. Commun ACM 18(9):509\u2013517","journal-title":"Commun ACM"},{"key":"358_CR5","doi-asserted-by":"crossref","unstructured":"Bingham E, Mannila H (2001) Random projection in dimensionality reduction: applications to image and text data. In: Proceedings of the seventh ACM SIGKDD international conference on knowledge discovery and data mining, pp. 245\u2013250 (2001)","DOI":"10.1145\/502512.502546"},{"issue":"3","key":"358_CR6","first-page":"27","volume":"2","author":"C Chang","year":"2011","unstructured":"Chang C, Lin C (2011) LIBSVM: a library for support vector machines. ACM Trans Intell Syst Technol (TIST) 2(3):27","journal-title":"ACM Trans Intell Syst Technol (TIST)"},{"issue":"2","key":"358_CR7","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1016\/j.patcog.2005.08.016","volume":"40","author":"Y Chen","year":"2007","unstructured":"Chen Y, Hung Y, Yen T, Fuh C (2007) Fast and versatile algorithm for nearest neighbor search based on a lower bound tree. Pattern Recognit 40(2):360\u2013375","journal-title":"Pattern Recognit"},{"key":"358_CR8","unstructured":"Cunningham P, Delany SJ (2007) k-Nearest neighbour classifiers. Dublin: Technical Report UCD-CSI-2007-4"},{"issue":"1","key":"358_CR9","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/s10994-010-5197-4","volume":"81","author":"L Du","year":"2010","unstructured":"Du L, Buntine W, Jin H (2010) A segmented topic model based on the two-parameter Poisson\u2013Dirichlet process. Mach Learn 81(1):5\u201319","journal-title":"Mach Learn"},{"issue":"3","key":"358_CR10","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1007\/s10115-011-0425-1","volume":"31","author":"L Du","year":"2012","unstructured":"Du L, Buntine W, Jin H, Chen C (2012) Sequential latent Dirichlet allocation. Knowl Inf Syst 31(3):475\u2013503","journal-title":"Knowl Inf Syst"},{"key":"358_CR11","first-page":"1289","volume":"3","author":"G Forman","year":"2003","unstructured":"Forman G (2003) An extensive empirical study of feature selection metrics for text classification. J Mach Learn Res 3:1289\u20131305","journal-title":"J Mach Learn Res"},{"key":"358_CR12","doi-asserted-by":"crossref","unstructured":"Guan H, Zhou J, Guo M (2009) A class-feature-centroid classifier for text categorization. In: Proceedings of the 18th international conference on World Wide Web, pp. 201\u2013210 (2009)","DOI":"10.1145\/1526709.1526737"},{"issue":"5","key":"358_CR13","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1007\/s00500-005-0503-y","volume":"10","author":"G Guo","year":"2006","unstructured":"Guo G, Wang H, Bell D, Bi Y, Greer K (2006) Using kNN model for automatic text categorization. Soft Comput 10(5):423\u2013430","journal-title":"Soft Comput"},{"key":"358_CR14","doi-asserted-by":"crossref","unstructured":"Han EH, Karypis G (2000) Centroid-based document classification: analysis and experimental results. In: Proceedings of the 4th European Conference on Principles of Data Mining and Knowledge Discovery, pp. 116\u2013123 (2000)","DOI":"10.1007\/3-540-45372-5_46"},{"key":"358_CR15","doi-asserted-by":"crossref","unstructured":"Han E, Karypis G, Kumar V (2001) Text categorization using weight adjusted k-nearest neighbor classification. In: Proceedings of the 5th Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 53\u201365","DOI":"10.1007\/3-540-45357-1_9"},{"key":"358_CR16","unstructured":"Han X, Li S, Shen Z (2012) A k-NN method for large scale hierarchical text classification at LSHTC3. In: Third Pascal Challenge on Large Scale Hierarchical Text classification (2012)"},{"issue":"9","key":"358_CR17","doi-asserted-by":"crossref","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He H, Garcia EA (2009) Learning from imbalanced data. IEEE Trans Knowl Data Eng 21(9):1263\u20131284","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"2","key":"358_CR18","doi-asserted-by":"crossref","first-page":"364","DOI":"10.1145\/1071610.1071612","volume":"30","author":"HV Jagadish","year":"2005","unstructured":"Jagadish HV, Ooi BC, Tan K, Yu C, Zhang R (2005) iDistance: an adaptive B+-tree based indexing method for nearest neighbor search. ACM Trans Database Syst (TODS) 30(2):364\u2013397","journal-title":"ACM Trans Database Syst (TODS)"},{"issue":"1","key":"358_CR19","doi-asserted-by":"crossref","first-page":"1503","DOI":"10.1016\/j.eswa.2011.08.040","volume":"39","author":"S Jiang","year":"2012","unstructured":"Jiang S, Pang G, Wu M, Kuang L (2012) An improved K-nearest-neighbor algorithm for text categorization. Expert Syst Appl 39(1):1503\u20131509","journal-title":"Expert Syst Appl"},{"key":"358_CR20","unstructured":"Joachims T (1996) A Probabilistic Analysis of the Rocchio Algorithm with TFIDF for Text Categorization. In: Proceedings of the 14th International Conference on Machine Learning, pp. 143\u2013151 (1996)"},{"key":"358_CR21","doi-asserted-by":"crossref","unstructured":"Joachims T (1998) Text categorization with support vector machines: Learning with many relevant features. In: Proceedings of the 10th European Conference on Machine Learning, pp. 137\u2013142","DOI":"10.1007\/BFb0026683"},{"key":"358_CR22","doi-asserted-by":"crossref","unstructured":"Joachims T (2001) A statistical learning learning model of text classification for support vector machines. In: Proceedings of the 24th annual international ACM SIGIR conference on research and development in information retrieval, pp. 128\u2013136 (2001)","DOI":"10.1145\/383952.383974"},{"key":"358_CR23","doi-asserted-by":"crossref","first-page":"369","DOI":"10.1145\/253262.253347","volume":"26","author":"N Katayama","year":"1997","unstructured":"Katayama N, Satoh S (1997) The SR-tree: an index structure for high-dimensional nearest neighbor queries. ACM SIGMOD Rec 26:369\u2013380","journal-title":"ACM SIGMOD Rec"},{"key":"358_CR24","first-page":"37","volume":"6","author":"H Kim","year":"2005","unstructured":"Kim H, Howland P, Park H (2005) Dimension reduction in text classification with support vector machines. J Mach Learn Res 6:37\u201353","journal-title":"J Mach Learn Res"},{"issue":"1","key":"358_CR25","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1145\/1842890.1842894","volume":"44","author":"A Kosmopoulos","year":"2010","unstructured":"Kosmopoulos A, Gaussier E, Paliouras G, Aseervatham S (2010) The ECIR 2010 large scale hierarchical classification workshop. ACM SIGIR Forum 44(1):23\u201332","journal-title":"ACM SIGIR Forum"},{"issue":"5","key":"358_CR26","doi-asserted-by":"crossref","first-page":"628","DOI":"10.1109\/TPAMI.2003.1195997","volume":"25","author":"W Lam","year":"2003","unstructured":"Lam W, Han Y (2003) Automatic textual document categorization based on generalized instance sets and a metamodel. IEEE Trans Pattern Anal Mach Intell 25(5):628\u2013633","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"358_CR27","doi-asserted-by":"crossref","first-page":"721","DOI":"10.1109\/TPAMI.2008.110","volume":"31","author":"M Lan","year":"2009","unstructured":"Lan M, Tan CL, Su J, Lu Y (2009) Supervised and traditional term weighting methods for automatic text categorization. IEEE Trans Pattern Anal Mach Intell 31(4):721\u2013735","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"358_CR28","unstructured":"Lin J, Gunopulos D (2003) Dimensionality reduction by random projection and latent semantic indexing. In: Proceedings of SDM\u20192003 Workshop on Text Mining Workshop (2003)"},{"key":"358_CR29","unstructured":"Liu T, Chen Z, Zhang B, Ma W, Wu G (2004) Improving text classification using local latent semantic indexing. In: Proceedings of the 4th IEEE International Conference on Data Mining, pp. 162\u2013169 (2004)"},{"key":"358_CR30","unstructured":"Mani I, Zhang I (2003) kNN approach to unbalanced data distributions: a case study involving information extraction. In: Proceedings of ICML\u20192003 Workshop on Learning from Imbalanced Datasets (2003)"},{"key":"358_CR31","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to information retrieval","author":"CD Manning","year":"2008","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008) Introduction to information retrieval. Cambridge University Press, Cambridge"},{"key":"358_CR32","unstructured":"Miao Y., Qiu X. Hierarchical centroid-based classifier for large scale text classification. In: First Pascal Challenge on Large Scale Hierarchical Text classification (2009)."},{"key":"358_CR33","unstructured":"Moore AW, Hall T (1990) Efficient memory-based learning for robot control. Doctoral dissertation, University of Cambridge (1990)"},{"issue":"2","key":"358_CR34","doi-asserted-by":"crossref","first-page":"576","DOI":"10.1016\/j.ipm.2012.10.003","volume":"49","author":"G Pang","year":"2013","unstructured":"Pang G, Jiang S (2013) A generalized cluster centroid based classifier for text categorization. Inf Process Manag 49(2):576\u2013586","journal-title":"Inf Process Manag"},{"key":"358_CR35","volume-title":"Advanced data mining and applications","author":"G Pang","year":"2013","unstructured":"Pang G, Jiang S, Chen D (2013) A simple integration of social relationship and text data for identifying potential customers in microblogging. Advanced data mining and applications. Springer, Berlin"},{"key":"358_CR36","doi-asserted-by":"crossref","unstructured":"Papadimitriou CH, Tamaki H, Raghavan P, Vempala S (1998) Latent semantic indexing: a probabilistic analysis. In: Proceedings of the 17th ACM SIGACT-SIGMOD-SIGART symposium on principles of database systems, pp. 159\u2013168 (1998)","DOI":"10.1145\/275487.275505"},{"issue":"11","key":"358_CR37","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A, Yang C (1975) A vector space model for automatic indexing. Commun ACM 18(11):613\u2013620","journal-title":"Commun ACM"},{"issue":"1","key":"358_CR38","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F Sebastiani","year":"2002","unstructured":"Sebastiani F (2002) Machine learning in automated text categorization. ACM Comput Surv (CSUR) 34(1):1\u201347","journal-title":"ACM Comput Surv (CSUR)"},{"key":"358_CR39","unstructured":"Sun JT, Chen Z, Zeng HJ, Lu YC, Shi CY, Ma WY (2004) Supervised latent semantic indexing for document categorization. In: Proceedings of the 4th IEEE International Conference on Data Mining, pp. 535\u2013538 (2004)"},{"issue":"4","key":"358_CR40","doi-asserted-by":"crossref","first-page":"687","DOI":"10.1142\/S0218001409007326","volume":"23","author":"Y Sun","year":"2009","unstructured":"Sun Y, Wong AK, Kamel MS (2009) Classification of imbalanced data: a review. Int J Pattern Recog Artif Intell 23(4):687\u2013719","journal-title":"Int J Pattern Recog Artif Intell"},{"issue":"4","key":"358_CR41","doi-asserted-by":"crossref","first-page":"667","DOI":"10.1016\/j.eswa.2004.12.023","volume":"28","author":"S Tan","year":"2005","unstructured":"Tan S (2005) Neighbor-weighted k-nearest neighbor for unbalanced text corpus. Expert Syst Appl 28(4):667\u2013671","journal-title":"Expert Syst Appl"},{"issue":"2","key":"358_CR42","doi-asserted-by":"crossref","first-page":"290","DOI":"10.1016\/j.eswa.2005.07.019","volume":"30","author":"S Tan","year":"2006","unstructured":"Tan S (2006) An effective refinement strategy for KNN text classifier. Expert Syst Appl 30(2):290\u2013298","journal-title":"Expert Syst Appl"},{"key":"358_CR43","doi-asserted-by":"crossref","unstructured":"Tan S, Cheng X (2007) An effective approach to enhance centroid classifier for text categorization. In: Proceedings of the 11th European conference on Principles and Practice of Knowledge Discovery in Databases, pp. 581\u2013588 (2007)","DOI":"10.1007\/978-3-540-74976-9_61"},{"key":"358_CR44","unstructured":"Tang L, Liu H (2005) Bias analysis in text classification for highly skewed data. In: Proceedings of the 5th IEEE International Conference on Data Mining, pp. 781\u2013784 (2005)"},{"key":"358_CR45","doi-asserted-by":"crossref","unstructured":"Vilalta R, Achari M, Eick CF (2003) Class decomposition via clustering: a new framework for low-variance classifiers. In: Proceedings of the 3rd IEEE International Conference on Data Mining, pp. 673\u2013676 (2003)","DOI":"10.1109\/ICDM.2003.1251005"},{"issue":"15","key":"358_CR46","doi-asserted-by":"crossref","first-page":"11880","DOI":"10.1016\/j.eswa.2012.02.068","volume":"39","author":"CH Wan","year":"2012","unstructured":"Wan CH, Lee LH, Rajkumar R, Isa D (2012) A hybrid text classification approach with low dependency on parameter by integrating K-nearest neighbor and support vector machine. Expert Syst Appl 39(15):11880\u201311888","journal-title":"Expert Syst Appl"},{"key":"358_CR47","unstructured":"Wang X, Zhao H, Lu B (2011) Enhance k-nearest neighbour algorithm for large-scale multi-labeled hierarchical classification. In: Second Pascal Challenge on Large Scale Hierarchical Text classification (2011)"},{"key":"358_CR48","doi-asserted-by":"crossref","unstructured":"Wang X, Zhao H, Lu B (2013) A Meta-Top-down Method for Large-scale Hierarchical Classification. IEEE Trans Knowl Data Eng, 99 (2013). doi: 10.1109\/TKDE.2013.30","DOI":"10.1109\/TKDE.2013.30"},{"issue":"1\u20135","key":"358_CR49","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1023\/A:1006593614256","volume":"11","author":"D Wettschereck","year":"1997","unstructured":"Wettschereck D, Aha DW, Mohri T (1997) A review and empirical evaluation of feature weighting methods for a class of lazy learning algorithms. Artif Intell Rev 11(1\u20135):273\u2013314","journal-title":"Artif Intell Rev"},{"issue":"1","key":"358_CR50","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10115-007-0114-2","volume":"14","author":"X Wu","year":"2008","unstructured":"Wu X, Kumar V, Ross Quinlan J, Ghosh J, Yang Q, Motoda H, Mclachlan GJ, Ng A, Liu B, Yu PS (2008) Top 10 algorithms in data mining. Knowl Inf Syst 14(1):1\u201337","journal-title":"Knowl Inf Syst"},{"key":"358_CR51","doi-asserted-by":"crossref","unstructured":"Yang Y (1994) Expert network: effective and efficient learning from human decisions in text categorization and retrieval. In: Proceedings of the 17th annual international ACM SIGIR conference on research and development in information retrieval, pp. 13\u201322 (1994)","DOI":"10.1007\/978-1-4471-2099-5_2"},{"key":"358_CR52","doi-asserted-by":"crossref","unstructured":"Yang Y, Ault T, Pierce T, Lattimer CW (2000) Improving text categorization methods for event tracking. In: Proceedings of the 23rd annual international ACM SIGIR conference on research and development in information retrieval, pp. 65\u201372","DOI":"10.1145\/345508.345550"},{"key":"358_CR53","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1007\/978-3-642-03040-6_7","volume":"5507","author":"H Yang","year":"2009","unstructured":"Yang H, King I (2009) Sprinkled latent semantic indexing for text classification with background knowledge. Lect Notes Comput Sci 5507:53\u201360","journal-title":"Lect Notes Comput Sci"},{"key":"358_CR54","doi-asserted-by":"crossref","unstructured":"Yang Y, Liu X (1999) A re-examination of text categorization methods. In: Proceedings of the 22nd annual international ACM SIGIR conference on research and development in information retrieval, pp. 42\u201349","DOI":"10.1145\/312624.312647"},{"key":"358_CR55","unstructured":"Yang Y, Pedersen JO (1997) A comparative study on feature selection in text categorization. In: Proceedings of the 14th International Conference on Machine Learning, pp. 412\u2013420"},{"issue":"7","key":"358_CR56","doi-asserted-by":"crossref","first-page":"2038","DOI":"10.1016\/j.patcog.2006.12.019","volume":"40","author":"M Zhang","year":"2007","unstructured":"Zhang M, Zhou Z (2007) ML-KNN: a lazy learning approach to multi-label learning. Pattern Recognit 40(7):2038\u20132048","journal-title":"Pattern Recognit"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-014-0358-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-014-0358-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-014-0358-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T19:29:45Z","timestamp":1559244585000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-014-0358-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,7,3]]},"references-count":56,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2015,5]]}},"alternative-id":["358"],"URL":"https:\/\/doi.org\/10.1007\/s10618-014-0358-x","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,7,3]]}}}