{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:49:24Z","timestamp":1763347764807,"version":"3.38.0"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2011,5,18]],"date-time":"2011-05-18T00:00:00Z","timestamp":1305676800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2012,4]]},"DOI":"10.1007\/s10115-011-0403-7","type":"journal-article","created":{"date-parts":[[2011,5,17]],"date-time":"2011-05-17T08:06:06Z","timestamp":1305619566000},"page":"23-53","source":"Crossref","is-referenced-by-count":43,"title":["Highly discriminative statistical features for email classification"],"prefix":"10.1007","volume":"31","author":[{"given":"Juan Carlos","family":"Gomez","sequence":"first","affiliation":[]},{"given":"Erik","family":"Boiy","sequence":"additional","affiliation":[]},{"given":"Marie-Francine","family":"Moens","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2011,5,18]]},"reference":[{"key":"403_CR1","doi-asserted-by":"crossref","unstructured":"Abu-Nimeh S, Nappa D, Wang X, Nair S (2007) A comparison of machine learning techniques for phishing detection. In: eCrime \u201907: proceedings of the anti-phishing working groups 2nd annual eCrime researchers summit. ACM, New York, pp 60\u201369","DOI":"10.1145\/1299015.1299021"},{"key":"403_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal R, Imieli\u0144ski T, Swami A (1993) Mining association rules between sets of items in large databases. In: SIGMOD \u201993: proceedings of the 1993 ACM SIGMOD international conference on management of Data. ACM, New York, NY, USA, pp 207\u2013216","DOI":"10.1145\/170035.170072"},{"key":"403_CR3","first-page":"37","volume":"6","author":"DW Aha","year":"1991","unstructured":"Aha DW, Kibler DF, Albert MK (1991) Instance-based learning algorithms. Mach Learn 6: 37\u201366","journal-title":"Mach Learn"},{"key":"403_CR4","unstructured":"Androutsopoulos I, Koutsias J, Chandrinos KV, Ch KV, Paliouras G, Spyropoulos CD (2000) An evaluation of na\u00efve Bayesian anti-spam filtering, pp 9\u201317"},{"issue":"10","key":"403_CR5","doi-asserted-by":"crossref","first-page":"2385","DOI":"10.1162\/089976600300014980","volume":"12","author":"G Baudat","year":"2000","unstructured":"Baudat G, Anouar F (2000) Generalized discriminant analysis using a kernel approach. Neural Comput 12(10): 2385\u20132404","journal-title":"Neural Comput"},{"key":"403_CR6","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198538493.001.0001","volume-title":"Neural networks for pattern recognition","author":"C Bishop","year":"1995","unstructured":"Bishop C (1995) Neural networks for pattern recognition. Clarendon Press, Oxford"},{"key":"403_CR7","volume-title":"Neural information processing systems","author":"DM Blei","year":"2003","unstructured":"Blei DM, Griffiths TL, Jordan MI, Tenenbaum JB (2003) Hierarchical topic models and the nested Chinese restaurant process. In: Thrun S, Saul LK, Sch\u00f6lkopf B (eds) Neural information processing systems. MIT Press, Cambridge"},{"key":"403_CR8","first-page":"2003","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI, Lafferty J (2003) Latent dirichlet\u00a0allocation. J Mach Learn Res 3: 2003","journal-title":"J Mach Learn Res"},{"key":"403_CR9","doi-asserted-by":"crossref","unstructured":"Borgelt C, Kruse R (2002) Induction of association rules: apriori implementation. In: Proceedings of 15th conference on computational statistics (COMPSTAT 2002). Physica Verlag, Heidelberg, Germany","DOI":"10.1007\/978-3-642-57489-4_59"},{"key":"403_CR10","unstructured":"Brank J, Grobelnik M, Frayling MN, Mladenic D (2002) Feature selection using support vector machines. In: Proceedings of the third international conference on data mining methods and databases for engineering, finance, and other fields, Bologna, Italy, pp 25\u201327"},{"key":"403_CR11","first-page":"2673","volume":"7","author":"A Bratko","year":"2006","unstructured":"Bratko A, Cormack G, Filipic B, Lynam T, Zupan B (2006) Spam filtering using statistical data compression models. J Mach Learn Res 7: 2673\u20132698","journal-title":"J Mach Learn Res"},{"issue":"2","key":"403_CR12","first-page":"123","volume":"24","author":"L Breiman","year":"1996","unstructured":"Breiman L (1996) Bagging predictors. Mach Learn 24(2): 123\u2013140","journal-title":"Mach Learn"},{"key":"403_CR13","unstructured":"Brutlag JD, Meek C (2000) Challenges of the email domain for text classification. In: ICML \u201900: proceedings of the seventeenth international conference on machine learning. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, pp 103\u2013110"},{"key":"403_CR14","doi-asserted-by":"crossref","unstructured":"Cai L, Hofmann T (2003) Text categorization by boosting automatically extracted concepts. In: SIGIR \u201903: proceedings of the 26th annual international ACM SIGIR conference on research and development in information retrievalm, pp 182\u2013189","DOI":"10.1145\/860435.860470"},{"key":"403_CR15","unstructured":"Carreras X, M\u00e1rquez L, Salgado JG (2001) Boosting trees for anti-spam email filtering. In: RANLP-01: 4th international conference on recent advances in natural language processing pp 58\u201364"},{"key":"403_CR16","unstructured":"Chen C, Tian Y, Zhang C (2008) Spam filtering with several novel Bayesian classifiers. In: ICPR \u201908: proceedings of the 19th international conference on pattern recognition, pp 1\u20134"},{"key":"403_CR17","doi-asserted-by":"crossref","unstructured":"Cheng H, Yan X, Han J, wei Hsu C (2007) Discriminative frequent pattern analysis for effective classification. In: IEEE 23rd international conference on data engineering, pp 716\u2013725","DOI":"10.1109\/ICDE.2007.367917"},{"key":"403_CR18","doi-asserted-by":"crossref","unstructured":"Cormack GV (2007) Spam track overview. In: TREC-2007: sixteenth text retrieval conference","DOI":"10.6028\/NIST.SP.500-274.spam-overview"},{"key":"403_CR19","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511801389","volume-title":"An introduction to support vector machines and other Kernel-based learning methods","author":"N Cristianini","year":"2000","unstructured":"Cristianini N, Shawe-Taylor J (2000) An introduction to support vector machines and other Kernel-based learning methods. Cambridge University Press, Cambridge, UK"},{"key":"403_CR20","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais ST, Furnas GW, Landauer TK, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inf Sci 41: 391\u2013407","journal-title":"J Am Soc Inf Sci"},{"key":"403_CR21","doi-asserted-by":"crossref","unstructured":"Fette I, Sadeh N, Tomasic A (2007) Learning to detect phishing emails. In: WWW \u201907: proceedings of the 16th international conference on World Wide Web. ACM, New York, NY, USA, pp 649\u2013656","DOI":"10.1145\/1242572.1242660"},{"key":"403_CR22","volume-title":"Introduction to statistical pattern recognition","author":"K Fukunaga","year":"1990","unstructured":"Fukunaga K (1990) Introduction to statistical pattern recognition. Academic Press, London"},{"issue":"4","key":"403_CR23","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1038\/scientificamerican0405-42","volume":"292","author":"J Goodman","year":"2005","unstructured":"Goodman J, Heckerman D, Rounthwaite R (2005) Stopping spam. Sci Am 292(4): 42\u201388","journal-title":"Sci Am"},{"issue":"1\u20133","key":"403_CR24","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1023\/A:1012487302797","volume":"46","author":"I Guyon","year":"2002","unstructured":"Guyon I, Weston J, Barnhill S, Vapnik V (2002) Gene selection for cancer classification using support vector machines. Mach Lear 46(1\u20133): 389\u2013422","journal-title":"Mach Lear"},{"key":"403_CR25","doi-asserted-by":"crossref","first-page":"10206","DOI":"10.1016\/j.eswa.2009.02.037","volume":"36","author":"TS Guzella","year":"2009","unstructured":"Guzella TS, Caminhas WM (2009) A review of machine learning approaches to spam filtering. Expert Syst Appl 36: 10206\u201310222","journal-title":"Expert Syst Appl"},{"key":"403_CR26","unstructured":"Hartley R, Schaffalizky F (2003) PowerFactorization: 3d reconstruction with missing or uncertain data. In: Australia\u2013Japan advanced workshop on computer vision"},{"key":"403_CR27","doi-asserted-by":"crossref","unstructured":"Hofmann T (1999) Probabilistic latent semantic indexing. In: Uncertainty in artificial intelligence, pp 50\u201357","DOI":"10.1145\/312624.312649"},{"key":"403_CR28","unstructured":"Hovold J (2005) Na\u00efve Bayes spam filtering using word-position-based attributes and length-sensitive classification thresholds. In: NODALIDA \u201905: proceedings of the 15th nordic conference of computational linguistics, pp 78\u201387"},{"issue":"4","key":"403_CR29","doi-asserted-by":"crossref","first-page":"648","DOI":"10.1109\/JPROC.2008.916364","volume":"96","author":"TS Huang","year":"2008","unstructured":"Huang TS, Dagli CK, Rajaram S, Chang EY, Mandel MI, Poliner GE, Ellis DPW (2008) Active learning for interactive multimedia retrieval. Proc IEEE 96(4): 648\u2013667","journal-title":"Proc IEEE"},{"key":"403_CR30","doi-asserted-by":"crossref","unstructured":"Ishii N, Murai T, Yamada T, Bao Y, Suzuki S (2006) Text classification: combining grouping, LSA and knn vs support vector machine. In: \u2018Knowledge-Based Intelligent Information and Engineering Systems\u2019 Vol. 4252, pp. 393\u2013400","DOI":"10.1007\/11893004_51"},{"key":"403_CR31","unstructured":"Istv\u00e1n B, J\u00e1cint S, Andr\u00e1s B (2008) Latent Dirichlet\u00a0allocation in web spam filtering. In: AIRWeb \u201908: proceedings of the 4th international workshop on adversarial information retrieval on the Web\u2019 pp 29\u201332"},{"key":"403_CR32","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-1904-8","volume-title":"Principal component analysis","author":"IT Jolliffe","year":"1986","unstructured":"Jolliffe IT (1986) Principal component analysis. Springer, New York"},{"issue":"6","key":"403_CR33","doi-asserted-by":"crossref","first-page":"1047","DOI":"10.1142\/S0218213007003692","volume":"16","author":"I Kanaris","year":"2007","unstructured":"Kanaris I, Kanaris K, Houvardas I, Stamatatos E (2007) Words versus character n-grams for anti-spam filtering. Int J Artif Intell Tools 16(6): 1047\u20131067","journal-title":"Int J Artif Intell Tools"},{"issue":"3","key":"403_CR34","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1007\/s10115-009-0206-2","volume":"22","author":"I Katakis","year":"2010","unstructured":"Katakis I, Tsoumakas G, Vlahavas I (2010) Tracking recurring contexts using ensemble classifiers: an application to email filtering. Knowl Inf Syst 22(3): 371\u2013391","journal-title":"Knowl Inf Syst"},{"key":"403_CR35","unstructured":"Meyer TA, Whateley B (2004) SpamBayes: effective open-source, Bayesian based, email classification syste. In: CEAS \u201904: proceedings of the first conference on email and anti-spam"},{"key":"403_CR36","volume-title":"Machine learning","author":"TM Mitchell","year":"1997","unstructured":"Mitchell TM (1997) Machine learning. McGraw-Hill Science\/Engineering\/Math, NY"},{"key":"403_CR37","doi-asserted-by":"crossref","unstructured":"Mladeni\u0107 D, Brank J, Grobelnik M, Milic-Frayling N (2004) Feature selection using linear classifier weights: interaction with classification models. In: SIGIR \u201904: proceedings of the 27th annual international ACM SIGIR conference on research and development in information retrieval. ACM, New York, NY, USA pp 234\u2013241","DOI":"10.1145\/1008992.1009034"},{"key":"403_CR38","unstructured":"Moler CB, Stewart GW (1973) An algorithm for generalized matrix eigenvalue problems. SIAM: J Numer Anal (19):241\u2013256"},{"key":"403_CR39","unstructured":"Platt JC (1998) Fast training of SVMs using sequential minimal optimization. In: Schoelkopf B, Burges C, Smola A (eds) Advances in kernel methods-support vector learning. MIT Press, Cambridge, pp 185\u2013208"},{"key":"403_CR40","doi-asserted-by":"crossref","unstructured":"Pu Q, Yang G-W (2006) Short-text classification based on ICA and LSA. In: Advances in neural networks, vol 3972, pp. 265\u2013270","DOI":"10.1007\/11760023_39"},{"issue":"19","key":"403_CR41","doi-asserted-by":"crossref","first-page":"4040","DOI":"10.1016\/j.ins.2007.04.005","volume":"177","author":"T Qian","year":"2007","unstructured":"Qian T, Xiong H, Wang Y, Chen E (2007) On the strength of hyperclique patterns for text categorization. Inf Sci 177(19): 4040\u20134058","journal-title":"Inf Sci"},{"key":"403_CR42","volume-title":"C4.5: programs for machine learning","author":"JR Quinlan","year":"1993","unstructured":"Quinlan JR (1993) C4.5: programs for machine learning. Morgan Kaufmann, San Mateo"},{"key":"403_CR43","unstructured":"Robinson G (2003) A statistical approach to the spam problem. Linux J (107):3"},{"key":"403_CR44","unstructured":"Schneider K-M (2003) A comparison of event models for na\u00efve Bayes anti-spam e-mail filtering. In: EACL \u201903: proceedings of the tenth conference on European chapter of the association for computational linguistics. Association for Computational Linguistics, Morristown, NJ, USA, pp 307\u2013314"},{"key":"403_CR45","doi-asserted-by":"crossref","unstructured":"Siefkes C, Assis F, Chhabra S, Yerazunis WS (2004) Combining winnow and orthogonal sparse bigrams for incremental spam filtering. In: PKDD \u201904: proceedings of the 8th European conference on principles and practice of knowledge discovery in databases, vol 3202. Springer, Morristown, NJ, USA, pp. 410\u2013421","DOI":"10.1007\/978-3-540-30116-5_38"},{"key":"403_CR46","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1007\/s10044-003-0196-8","volume":"6","author":"K Torkkola","year":"2004","unstructured":"Torkkola K (2004) Discriminative features for document classification. Pattern Anal Appl 6: 301\u2013308","journal-title":"Pattern Anal Appl"},{"key":"403_CR47","unstructured":"Tsymbal A, Puuronen S, Pechenizkiy M, Baumgarten M, Patterson DW (2002) Eigenvector-based feature extraction for classification. In: Haller SM, Simmons G (eds) FLAIRS conference. AAAI Press, pp 354\u2013358"},{"key":"403_CR48","doi-asserted-by":"crossref","unstructured":"Wang F, Zhang C (2007) Feature extraction by maximizing the average neighborhood margin. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE Computer Society","DOI":"10.1109\/CVPR.2007.383124"},{"issue":"2","key":"403_CR49","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1214\/aoms\/1177731124","volume":"16","author":"F Waugh","year":"1945","unstructured":"Waugh F (1945) A note concerning hotelling\u2019s method of inverting a partitioned matrix. Ann Math Stat 16(2): 216\u2013217","journal-title":"Ann Math Stat"},{"key":"403_CR50","volume-title":"Data mining: practical machine learning tools and techniques with java implementations","author":"IH Witten","year":"2000","unstructured":"Witten IH, Frank E (2000) Data mining: practical machine learning tools and techniques with java implementations. Morgan Kaufmann, San Francisco"},{"key":"403_CR51","doi-asserted-by":"crossref","unstructured":"Xia Y, Wong K-F (2006) Binarization approaches to email categorization. In: ICCPOL, pp 474\u2013481","DOI":"10.1007\/11940098_50"},{"key":"403_CR52","unstructured":"Xue G-R, Dai W, Yang Q, Yu Y (2008) Topic-bridged pLSA for cross-domain text classification. In: SIGIR \u201908: proceedings of the 31st annual international ACM SIGIR conference on research and development in information retrieval, pp 627\u2013634"},{"issue":"3","key":"403_CR53","doi-asserted-by":"crossref","first-page":"320","DOI":"10.1109\/TKDE.2006.45","volume":"18","author":"J Yan","year":"2006","unstructured":"Yan J, Zhang B, Liu N, Yan S, Cheng Q, Fan W, Yang Q, Xi W, Chen Z (2006) Effective and efficient dimensionality reduction for large-scale and streaming data preprocessing. IEEE Trans Knowl Data Eng 18(3): 320\u2013333","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"4","key":"403_CR54","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1016\/j.knosys.2008.01.001","volume":"21","author":"B Yu","year":"2008","unstructured":"Yu B, Xu Z-b (2008) A comparative study for content-based dynamic spam classification using four machine learning algorithms. Knowledge-Based Syst 21(4): 355\u2013362","journal-title":"Knowledge-Based Syst"},{"key":"403_CR55","doi-asserted-by":"crossref","unstructured":"Zhang Z, Phan X-H, SH (2008) An efficient feature selection using hidden topic in text categorization. In: AINAW \u201908: proceedings of the 22nd international conference on advanced information networking and applications\u2014Workshops, pp 1223\u20131228","DOI":"10.1109\/WAINA.2008.137"},{"key":"403_CR56","doi-asserted-by":"crossref","unstructured":"Zhou S, Li K, Liu Y (2008) Text categorization based on topic model. In: Wang G, Li T, Grzymala-Busse J, Miao D, Skowron A, Yao Y (eds) Rough sets and knowledge technology. Lecture notes in computer science, vol 5009, pp 572\u2013579","DOI":"10.1007\/978-3-540-79721-0_77"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-011-0403-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-011-0403-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-011-0403-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T15:19:36Z","timestamp":1741187976000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-011-0403-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,5,18]]},"references-count":56,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2012,4]]}},"alternative-id":["403"],"URL":"https:\/\/doi.org\/10.1007\/s10115-011-0403-7","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"type":"print","value":"0219-1377"},{"type":"electronic","value":"0219-3116"}],"subject":[],"published":{"date-parts":[[2011,5,18]]}}}