{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T04:31:42Z","timestamp":1759206702190,"version":"3.37.3"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2019,4,20]],"date-time":"2019-04-20T00:00:00Z","timestamp":1555718400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003130","name":"Fonds Wetenschappelijk Onderzoek","doi-asserted-by":"publisher","award":["G031914N"],"award-info":[{"award-number":["G031914N"]}],"id":[{"id":"10.13039\/501100003130","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1007\/s41060-019-00185-1","type":"journal-article","created":{"date-parts":[[2019,4,20]],"date-time":"2019-04-20T14:02:37Z","timestamp":1555768957000},"page":"131-173","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["A benchmarking study of classification techniques for behavioral data"],"prefix":"10.1007","volume":"9","author":[{"given":"Sofie","family":"De Cnudde","sequence":"first","affiliation":[]},{"given":"David","family":"Martens","sequence":"additional","affiliation":[]},{"given":"Theodoros","family":"Evgeniou","sequence":"additional","affiliation":[]},{"given":"Foster","family":"Provost","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,20]]},"reference":[{"key":"185_CR1","first-page":"1111","volume":"15","author":"A Agarwal","year":"2014","unstructured":"Agarwal, A., Chapelle, O., Dud\u00edk, M., Langford, J.: A reliable effective terascale linear learning system. J. Mach. Learn. Res. 15, 1111\u20131133 (2014)","journal-title":"J. Mach. Learn. Res."},{"unstructured":"Bannur, S.N.: Detecting malicious webpages using content based classification. Master\u2019s thesis, University of California, San Diego (2011)","key":"185_CR2"},{"doi-asserted-by":"crossref","unstructured":"Bennett, J., Lanning, S.: The netflix prize. In: Proceedings of 2007 KDD Cup and Workshop (2007)","key":"185_CR3","DOI":"10.1145\/1345448.1345459"},{"key":"185_CR4","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1016\/j.knosys.2013.10.016","volume":"55","author":"P Bermejo","year":"2014","unstructured":"Bermejo, P., G\u00e1mez, J.A., Puerta, J.M.: Speeding up incremental wrapper feature subset selection with Naive Bayes classifier. Knowl. Based Syst. 55, 140\u2013147 (2014)","journal-title":"Knowl. Based Syst."},{"doi-asserted-by":"crossref","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: International Conference on Computational Statistics (COMPSTAT), pp. 177\u2013186. Springer (2010)","key":"185_CR5","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"185_CR6","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1007\/3-540-45681-3_6","volume-title":"Principles of Data Mining and Knowledge Discovery","author":"Damien Brain","year":"2002","unstructured":"Brain, D., Webb, G.I.: The need for low bias algorithms in classification learning from large data sets. In: Elomaa, T., Mannila, H., Toivonen, H. (eds.) Principles of Data Mining and Knowledge Discovery. PKDD 2002. Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence), vol. 2431. Springer, Berlin, Heidelberg (2002)"},{"unstructured":"Brozovsky, L., Petricek, V.: Recommender system for online dating service. In: Znalosti Conference, pp. 1\u201312 (2007)","key":"185_CR7"},{"issue":"17","key":"185_CR8","doi-asserted-by":"crossref","first-page":"3067","DOI":"10.1016\/j.ins.2010.03.025","volume":"180","author":"L Cao","year":"2010","unstructured":"Cao, L.: In-depth behavior understanding and use: the behavior informatics approach. Inf. Sci. 180(17), 3067\u20133085 (2010)","journal-title":"Inf. Sci."},{"doi-asserted-by":"crossref","unstructured":"Cha, M., Mislove, A., Gummadi, K.P.: A measurement-driven analysis of information propagation in the Flickr social network. In: International Conference on World Wide Web (WWW), pp. 721\u2013730. ACM (2009)","key":"185_CR9","DOI":"10.1145\/1526709.1526806"},{"issue":"3","key":"185_CR10","first-page":"27","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang, C.C., Lin, C.J.: LIBSVM: a library for support vector machines. ACM Trans. Intell. Syst. Technol. (TIST) 2(3), 27 (2011)","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"},{"key":"185_CR11","first-page":"2935","volume":"11","author":"F Chang","year":"2010","unstructured":"Chang, F., Guo, C.Y., Lin, X.R., Lu, C.J.: Tree decomposition for large-scale SVM problems. J. Mach. Learn. Res. 11, 2935\u20132972 (2010)","journal-title":"J. Mach. Learn. Res."},{"doi-asserted-by":"crossref","unstructured":"Chen, Y., Pavlov, D., Canny, J.F.: Large-scale behavioral targeting. In: International Conference on Knowledge Discovery and Data Mining (SIGKDD), pp. 209\u2013218. ACM (2009)","key":"185_CR12","DOI":"10.1145\/1557019.1557048"},{"key":"185_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-019-00616-4","author":"J Clark","year":"2019","unstructured":"Clark, J., Provost, F.: Unsupervised dimensionality reduction versus supervised regularization for classification from sparse data. Data Min. Knowl. Discov. (2019). \nhttps:\/\/doi.org\/10.1007\/s10618-019-00616-4","journal-title":"Data Min. Knowl. Discov."},{"doi-asserted-by":"crossref","unstructured":"Colas, F., Brazdil, P.: Comparison of SVM and some older classification algorithms in text classification tasks. In: Artificial Intelligence in Theory and Practice, pp. 169\u2013178 (2006)","key":"185_CR14","DOI":"10.1007\/978-0-387-34747-9_18"},{"key":"185_CR15","first-page":"1687","volume":"7","author":"R Collobert","year":"2006","unstructured":"Collobert, R., Sinz, F., Weston, J., Bottou, L.: Large scale transductive SVMs. J. Mach. Learn. Res. 7, 1687\u20131712 (2006)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"185_CR16","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes, C., Vapnik, V.: Support-vector networks. Mach. Learn. 20(3), 273\u2013297 (1995)","journal-title":"Mach. Learn."},{"issue":"2","key":"185_CR17","doi-asserted-by":"crossref","first-page":"110","DOI":"10.1089\/big.2013.0010","volume":"1","author":"B Dalessandro","year":"2013","unstructured":"Dalessandro, B.: Bring the noise: embracing randomness is the key to scaling up machine learning algorithms. Big Data 1(2), 110\u2013112 (2013)","journal-title":"Big Data"},{"doi-asserted-by":"crossref","unstructured":"Dalessandro, B., Chen, D., Raeder, T., Perlich, C., Han Williams, M., Provost, F.: Scalable hands-free transfer learning for online advertising. In: International Conference on Knowledge Discovery and Data Mining (SIGKDD), pp. 1573\u20131582. ACM (2014)","key":"185_CR18","DOI":"10.1145\/2623330.2623349"},{"issue":"1","key":"185_CR19","doi-asserted-by":"crossref","first-page":"49","DOI":"10.3233\/FI-2010-216","volume":"98","author":"KW Bock De","year":"2010","unstructured":"De Bock, K.W., Van den Poel, D.: Predicting website audience demographics for web advertising targeting using multi-website clickstream data. Fundam. Inf. 98(1), 49\u201370 (2010)","journal-title":"Fundam. Inf."},{"key":"185_CR20","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1016\/j.dss.2015.03.004","volume":"73","author":"S Cnudde De","year":"2015","unstructured":"De Cnudde, S., Martens, D.: Loyal to your city? A data mining analysis of a public service loyalty program. Decis. Support Syst. 73, 74\u201384 (2015)","journal-title":"Decis. Support Syst."},{"unstructured":"De Cnudde, S., Moeyersoms, J., Stankova, M., Tobback, E., Javaly, V., Martens, D.: Who cares about your Facebook friends? Credit scoring for microfinance. Technical report, Department of Applied Economics, Antwerp University, Belgium (2015)","key":"185_CR21"},{"key":"185_CR22","first-page":"1","volume":"7","author":"J Dem\u0161ar","year":"2006","unstructured":"Dem\u0161ar, J.: Statistical comparisons of classifiers over multiple data sets. J. Mach. Learn. Res. 7, 1\u201330 (2006)","journal-title":"J. Mach. Learn. Res."},{"doi-asserted-by":"crossref","unstructured":"Do, T.N., Lenca, P., Lallich, S., Pham, N.K.: Classifying very-high-dimensional data with random forests of oblique decision trees. In: EGC (Best of Volume), pp. 39\u201355. Springer (2009)","key":"185_CR23","DOI":"10.1007\/978-3-642-00580-0_3"},{"unstructured":"Donoho, D.L.: High-dimensional data analysis: the curses and blessings of dimensionality. In: AMS Conference on Math Challenges of the 21st Century, pp. 1\u201332 (2000)","key":"185_CR24"},{"doi-asserted-by":"crossref","unstructured":"Dumais, S., Platt, J., Heckerman, D., Sahami, M.: Inductive learning algorithms and representations for text categorization. In: Proceedings of the Seventh International Conference on Information and Knowledge Management, pp. 148\u2013155. ACM (1998)","key":"185_CR25","DOI":"10.1145\/288627.288651"},{"key":"185_CR26","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan, R.E., Chang, K.W., Hsieh, C.J., Wang, X.R., Lin, C.J.: LIBLINEAR: a library for large linear classification. J. Mach. Learn. Res. 9, 1871\u20131874 (2008)","journal-title":"J. Mach. Learn. Res."},{"issue":"8","key":"185_CR27","doi-asserted-by":"crossref","first-page":"861","DOI":"10.1016\/j.patrec.2005.10.010","volume":"27","author":"T Fawcett","year":"2006","unstructured":"Fawcett, T.: An introduction to ROC analysis. Pattern Recognit. Lett. 27(8), 861\u2013874 (2006)","journal-title":"Pattern Recognit. Lett."},{"issue":"3","key":"185_CR28","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1023\/A:1009700419189","volume":"1","author":"T Fawcett","year":"1997","unstructured":"Fawcett, T., Provost, F.: Adaptive fraud detection. Data Min. Knowl. Discov. 1(3), 291\u2013316 (1997)","journal-title":"Data Min. Knowl. Discov."},{"key":"185_CR29","first-page":"3133","volume":"15","author":"M Fern\u00e1ndez-Delgado","year":"2014","unstructured":"Fern\u00e1ndez-Delgado, M., Cernadas, E., Barro, S., Amorim, D.: Do we need hundreds of classifiers to solve real world classification problems? J. Mach. Learn. Res. 15, 3133\u20133181 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"185_CR30","first-page":"1289","volume":"3","author":"G Forman","year":"2003","unstructured":"Forman, G.: An extensive empirical study of feature selection metrics for text classification. J. Mach. Learn. Res. 3, 1289\u20131305 (2003)","journal-title":"J. Mach. Learn. Res."},{"doi-asserted-by":"crossref","unstructured":"Forman, G., Scholz, M., Rajaram, S.: Feature shaping for linear SVM classifiers. In: Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 299\u2013308. ACM (2009)","key":"185_CR31","DOI":"10.1145\/1557019.1557057"},{"issue":"4","key":"185_CR32","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1089\/big.2013.0037","volume":"1","author":"E Junqu\u00e9 de Fortuny","year":"2013","unstructured":"Junqu\u00e9 de Fortuny, E., Martens, D., Provost, F.: Predictive modeling with big data: is bigger really better? Big Data 1(4), 215\u2013226 (2013)","journal-title":"Big Data"},{"issue":"6","key":"185_CR33","doi-asserted-by":"publisher","first-page":"1013","DOI":"10.1007\/s10994-018-5699-z","volume":"107","author":"Enric Junqu\u00e9 de Fortuny","year":"2018","unstructured":"Junqu\u00e9 de Fortuny, E., Martens, D., Provost, F.: Wallenius bayes. Mach. Learn. 107, 1013 (2018). \nhttps:\/\/doi.org\/10.1007\/s10994-018-5699-z","journal-title":"Machine Learning"},{"doi-asserted-by":"crossref","unstructured":"Junqu\u00e9\u00a0de Fortuny, E., Stankova, M., Moeyersoms, J., Minnaert, B., Provost, F., Martens, D.: Corporate residence fraud detection. In: International Conference on Knowledge Discovery and Data Mining (SIGKDD), pp. 1650\u20131659. ACM (2014)","key":"185_CR34","DOI":"10.1145\/2623330.2623333"},{"doi-asserted-by":"crossref","unstructured":"Junqu\u00e9\u00a0de Fortuny, E., Evgeniou, T., Martens, D., Provost, F.: Iteratively refining SVMs using priors. In: International Conference on Big Data (Big Data), pp. 46\u201352. IEEE (2015)","key":"185_CR35","DOI":"10.1109\/BigData.2015.7363740"},{"issue":"1","key":"185_CR36","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1023\/A:1009778005914","volume":"1","author":"JH Friedman","year":"1997","unstructured":"Friedman, J.H.: On bias, variance, 0\/1-loss, and the curse of dimensionality. Data Min. Knowl. Discov. 1(1), 55\u201377 (1997)","journal-title":"Data Min. Knowl. Discov."},{"issue":"4","key":"185_CR37","doi-asserted-by":"crossref","first-page":"650","DOI":"10.1037\/0033-295X.103.4.650","volume":"103","author":"G Gigerenzer","year":"1996","unstructured":"Gigerenzer, G., Goldstein, D.G.: Reasoning the fast and frugal way: models of bounded rationality. Psychol. Rev. 103(4), 650 (1996)","journal-title":"Psychol. Rev."},{"unstructured":"Gigerenzer, G., Todd, P.M., ABC Research Group, et al.: Simple Heuristics that Make Us Smart. Oxford University Press, Oxford (1999)","key":"185_CR38"},{"unstructured":"Goel, S., Hofman, J.M., Sirer, M.I.: Who does what on the web: a large-scale study of browsing behavior. In: International Conference on Web and Social Media (ICWSM), AAAI (2012)","key":"185_CR39"},{"issue":"8","key":"185_CR40","doi-asserted-by":"crossref","first-page":"1678","DOI":"10.1016\/j.jbusres.2015.03.026","volume":"68","author":"KC Green","year":"2015","unstructured":"Green, K.C., Armstrong, J.S.: Simple versus complex forecasting: the evidence. J. Bus. Res. 68(8), 1678\u20131685 (2015)","journal-title":"J. Bus. Res."},{"key":"185_CR41","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon, I., Elisseeff, A.: An introduction to variable and feature selection. J. Mach. Learn. Res. 3, 1157\u20131182 (2003)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"185_CR42","first-page":"385","volume":"69","author":"DJ Hand","year":"2001","unstructured":"Hand, D.J., Yu, K.: Idiot\u2019s Bayes\u2014not so stupid after all? Int. Stat. Rev. 69(3), 385\u2013398 (2001)","journal-title":"Int. Stat. Rev."},{"key":"185_CR43","volume-title":"Information Retrieval: Computational and Theoretical Aspects","author":"HS Heaps","year":"1978","unstructured":"Heaps, H.S.: Information Retrieval: Computational and Theoretical Aspects. Academic Press Inc., London (1978)"},{"key":"185_CR44","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1214\/088342306000000222","volume":"21","author":"S Hill","year":"2006","unstructured":"Hill, S., Provost, F., Volinsky, C.: Network-based marketing: identifying likely adopters via consumer networks. Stat. Sci. 21, 256\u2013276 (2006)","journal-title":"Stat. Sci."},{"unstructured":"Hsu, C.W., Chang, C.C., Lin, C.J.: A practical guide to support vector classification. Technical report, National Taiwan University, Taipei, Taiwan (2003)","key":"185_CR45"},{"issue":"1","key":"185_CR46","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1023\/B:APIN.0000047383.53680.b6","volume":"22","author":"X Hu","year":"2005","unstructured":"Hu, X.: A data mining approach for retailing bank customer attrition analysis. Appl. Intell. 22(1), 47\u201360 (2005)","journal-title":"Appl. Intell."},{"unstructured":"Huang, H.S., Lin, K.L., Hsu, J.Y.J., Hsu, C.N.: Item-triggered recommendation for identifying potential customers of cold sellers in supermarkets. In: Beyond Personalization Workshop on the Next Stage of Recommender Systems Research, pp. 37\u201342 (2005)","key":"185_CR47"},{"unstructured":"Huang, J., Lu, J., Ling, C.X. Comparing Naive Bayes, decision trees, and SVM with AUC and accuracy. In: International Conference on Data Mining (ICDM), pp. 553\u2013556. IEEE (2003)","key":"185_CR48"},{"issue":"6","key":"185_CR49","doi-asserted-by":"crossref","first-page":"571","DOI":"10.1080\/03610928008827904","volume":"9","author":"RL Iman","year":"1980","unstructured":"Iman, R.L., Davenport, J.M.: Approximations of the critical region of the Friedman statistic. Commun. Stat. Theory Methods 9(6), 571\u2013595 (1980)","journal-title":"Commun. Stat. Theory Methods"},{"key":"185_CR50","volume-title":"Text Categorization with Support Vector Machines: Learning with Many Relevant Features","author":"T Joachims","year":"1998","unstructured":"Joachims, T.: Text Categorization with Support Vector Machines: Learning with Many Relevant Features. Springer, Berlin (1998)"},{"issue":"3","key":"185_CR51","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1080\/08839519508945477","volume":"9","author":"RD King","year":"1995","unstructured":"King, R.D., Feng, C., Sutherland, A.: Statlog: comparison of classification algorithms on large real-world problems. Appl. Artif. Intell. 9(3), 289\u2013333 (1995)","journal-title":"Appl. Artif. Intell."},{"key":"185_CR52","first-page":"1137","volume":"14","author":"R Kohavi","year":"1995","unstructured":"Kohavi, R.: A study of cross-validation and bootstrap for accuracy estimation and model selection. Int. Joint Conf. Artif. Intell. (IJCAI) 14, 1137\u20131145 (1995)","journal-title":"Int. Joint Conf. Artif. Intell. (IJCAI)"},{"issue":"15","key":"185_CR53","doi-asserted-by":"crossref","first-page":"5802","DOI":"10.1073\/pnas.1218772110","volume":"110","author":"M Kosinski","year":"2013","unstructured":"Kosinski, M., Stillwell, D., Graepel, T.: Private traits and attributes are predictable from digital records of human behavior. Natl. Acad. Sci. 110(15), 5802\u20135805 (2013)","journal-title":"Natl. Acad. Sci."},{"unstructured":"Langford, J., Li, L., Strehl, A.: Vowpal Wabbit online learning project. Technical report. \nhttp:\/\/hunch.net\/~vw\/\n\n (2007). Accessed 10 Apr 2019","key":"185_CR54"},{"unstructured":"Langley, P., Iba, W., Thompson, K.: An analysis of Bayesian classifiers. In: National Conference on Artificial Intelligence, AAAI, vol. 90, pp. 223\u2013228 (1992)","key":"185_CR55"},{"issue":"1","key":"185_CR56","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.dss.2012.02.002","volume":"54","author":"K Li","year":"2012","unstructured":"Li, K., Du, T.C.: Building a targeted mobile advertising system for location-based services. Decis. Support Syst. 54(1), 1\u20138 (2012)","journal-title":"Decis. Support Syst."},{"unstructured":"Li, P., Owen, A., Zhang, C.H.: One permutation hashing. In: Pereira, F., Burges, C.J.C., Bottou, L., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems, vol. 25, pp. 3113\u20133121. \nhttp:\/\/papers.nips.cc\/paper\/4778-one-permutation-hashing\n\n (2012)","key":"185_CR57"},{"unstructured":"Li, X., Wang, H., Gu, B., Ling, C.X.: Data sparseness in linear SVM. In: International Conference on Artificial Intelligence, AAAI, pp. 3628\u20133634 (2015)","key":"185_CR58"},{"issue":"3","key":"185_CR59","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1023\/A:1007608224229","volume":"40","author":"TS Lim","year":"2000","unstructured":"Lim, T.S., Loh, W.Y., Shih, Y.S.: A comparison of prediction accuracy, complexity, and training time of thirty-three old and new classification algorithms. Mach. Learn. 40(3), 203\u2013228 (2000)","journal-title":"Mach. Learn."},{"key":"185_CR60","first-page":"627","volume":"9","author":"CJ Lin","year":"2008","unstructured":"Lin, C.J., Weng, R.C., Keerthi, S.: Trust region Newton method for logistic regression. J. Mach. Learn. Res. 9, 627\u2013650 (2008)","journal-title":"J. Mach. Learn. Res."},{"unstructured":"Liu, A., Ghosh, J., Martin, C.: Generative oversampling for mining imbalanced datasets. In: International Conference on Data Mining (ICDM), pp. 66\u201372. IEEE (2007)","key":"185_CR61"},{"doi-asserted-by":"crossref","unstructured":"Liu, J., Dolan, P., Pedersen, E.R.: Personalized news recommendation based on click behavior. In: International Conference on Intelligent User Interfaces (IUI), pp. 31\u201340. ACM (2010)","key":"185_CR62","DOI":"10.1145\/1719970.1719976"},{"issue":"1","key":"185_CR63","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/j.ins.2013.08.059","volume":"261","author":"N Maci\u00e0","year":"2014","unstructured":"Maci\u00e0, N., Bernad\u00f3-Mansilla, E.: Towards UCI+: a mindful repository design. Inf. Sci. 261(1), 237\u2013262 (2014)","journal-title":"Inf. Sci."},{"issue":"3","key":"185_CR64","doi-asserted-by":"crossref","first-page":"1054","DOI":"10.1016\/j.patcog.2012.09.022","volume":"46","author":"N Maci\u00e0","year":"2013","unstructured":"Maci\u00e0, N., Bernad\u00f3-Mansilla, E., Orriols-Puig, A., Ho, T.K.: Learner excellence biased by data set selection: a case for data characterisation and artificial data sets. Pattern Recognit. 46(3), 1054\u20131066 (2013)","journal-title":"Pattern Recognit."},{"key":"185_CR65","first-page":"935","volume":"8","author":"SA Macskassy","year":"2007","unstructured":"Macskassy, S.A., Provost, F.: Classification in networked data: a toolkit and a univariate case study. J. Mach. Learn. Res. 8, 935\u2013983 (2007)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"185_CR66","doi-asserted-by":"crossref","first-page":"73","DOI":"10.25300\/MISQ\/2014\/38.1.04","volume":"38","author":"D Martens","year":"2014","unstructured":"Martens, D., Provost, F.: Explaining data-driven document classifications. MIS Q. 38(1), 73\u2013100 (2014)","journal-title":"MIS Q."},{"issue":"4","key":"185_CR67","doi-asserted-by":"crossref","first-page":"869","DOI":"10.25300\/MISQ\/2016\/40.4.04","volume":"40","author":"D Martens","year":"2016","unstructured":"Martens, D., Provost, F., Clark, J., Junqu\u00e9 de Fortuny, E.: Mining massive fine-grained behavior data to improve predictive analytics. Manag. Inf. Syst. Q. (MISQ) 40(4), 869\u2013888 (2016)","journal-title":"Manag. Inf. Syst. Q. (MISQ)"},{"unstructured":"McCallum, A., Nigam, K.: A comparison of event models for Naive Bayes text classification. In: Workshop on Learning for Text Categorization, AAAI, pp. 41\u201348 (1998)","key":"185_CR68"},{"unstructured":"Metsis, V., Androutsopoulos, I., Paliouras, G.: Spam filtering with Naive Bayes-which Naive Bayes? In: CEAS, vol. 17, pp. 28\u201369 (2006)","key":"185_CR69"},{"unstructured":"Meyer, D., Leisch, F., Hornik, K.: Benchmarking support vector machines. Technical Report, Adaptive Information Systems and Modelling in Economics and Management Science, WU Vienna University of Economics and Business Administration, Austria (2002)","key":"185_CR70"},{"key":"185_CR71","volume-title":"Machine Learning, Neural and Statistical Classification","author":"D Michie","year":"2009","unstructured":"Michie, D., Spiegelhalter, D.J., Taylor, C.C.: Machine Learning, Neural and Statistical Classification. Overseas Press, New Delhi (2009)"},{"doi-asserted-by":"crossref","unstructured":"Ng, A.Y.: Feature selection, L1 vs. L2 regularization, and rotational invariance. In: International Conference on Machine Learning (ICML). ACM (2004)","key":"185_CR72","DOI":"10.1145\/1015330.1015435"},{"unstructured":"Ng, A.Y., Jordan, A.: On discriminative vs. generative classifiers: a comparison of logistic regression and Naive Bayes. In: Dietterich, T.G., Becker, S., Ghahramani, Z. (eds.) Advances in Neural Information Processing Systems (NIPS), vol. 14, p. 841 (2002). \nhttps:\/\/papers.nips.cc\/paper\/2020-on-discriminative-vs-generative-classifiers-a-comparison-of-logistic-regression-and-naive-bayes","key":"185_CR73"},{"unstructured":"Nie, F., Huang, Y., Wang, X., Huang, H.: New primal SVM solver with linear computational cost for big data classifications. In: International Conference on Machine Learning (ICML). ACM (2014)","key":"185_CR74"},{"doi-asserted-by":"crossref","unstructured":"Pandey, S., Aly, M., Bagherjeiran, A., Hatch, A., Ciccolo, P., Ratnaparkhi, A., Zinkevich, M.: Learning to target: what works for behavioral targeting. In: International Conference on Information and Knowledge Management (CIKM), pp. 1805\u20131814. ACM (2011)","key":"185_CR75","DOI":"10.1145\/2063576.2063837"},{"key":"185_CR76","first-page":"211","volume":"4","author":"C Perlich","year":"2003","unstructured":"Perlich, C., Provost, F., Simonoff, J.S.: Tree induction vs. logistic regression: a learning-curve analysis. J. Mach. Learn. Res. 4, 211\u2013255 (2003)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"185_CR77","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1007\/s10994-013-5375-2","volume":"95","author":"C Perlich","year":"2014","unstructured":"Perlich, C., Dalessandro, B., Raeder, T., Stitelman, O., Provost, F.: Machine learning for targeted display advertising: transfer learning in action. Mach. Learn. 95(1), 103\u2013127 (2014)","journal-title":"Mach. Learn."},{"key":"185_CR78","volume-title":"Data Science for Business: What You Need to Know about Data Mining and Data-Analytic Thinking","author":"F Provost","year":"2013","unstructured":"Provost, F., Fawcett, T.: Data Science for Business: What You Need to Know about Data Mining and Data-Analytic Thinking. O\u2019Reilly Media Inc., Newton (2013)"},{"issue":"2","key":"185_CR79","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1023\/A:1009876119989","volume":"3","author":"F Provost","year":"1999","unstructured":"Provost, F., Kolluri, V.: A survey of methods for scaling up inductive algorithms. Data Min. Knowl. Discov. 3(2), 131\u2013169 (1999)","journal-title":"Data Min. Knowl. Discov."},{"unstructured":"Provost, F., Fawcett, T., Kohavi, R.: The case against accuracy estimation for comparing induction algorithms. In: International Conference on Machine Learning (ICML), pp. 445\u2013453. ACM (1998)","key":"185_CR80"},{"doi-asserted-by":"crossref","unstructured":"Ralaivola, L., d\u2019Alch\u00e9 Buc, F.: Incremental support vector machine learning: a local approach. In: International Conference on Artificial Neural Networks (ICANN), pp. 322\u2013330. Springer (2001)","key":"185_CR81","DOI":"10.1007\/3-540-44668-0_46"},{"key":"185_CR82","first-page":"474","volume":"3230","author":"KM Schneider","year":"2004","unstructured":"Schneider, K.M.: On word frequency information and negative evidence in Naive Bayes text classification. EsTAL 3230, 474\u2013486 (2004)","journal-title":"EsTAL"},{"issue":"1","key":"185_CR83","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F Sebastiani","year":"2002","unstructured":"Sebastiani, F.: Machine learning in automated text categorization. ACM Comput. Surv. (CSUR) 34(1), 1\u201347 (2002)","journal-title":"ACM Comput. Surv. (CSUR)"},{"issue":"2","key":"185_CR84","first-page":"111","volume":"6","author":"JW Shavlik","year":"1991","unstructured":"Shavlik, J.W., Mooney, R.J., Towell, G.G.: Symbolic and neural learning algorithms: an experimental comparison. Mach. Learn. 6(2), 111\u2013143 (1991)","journal-title":"Mach. Learn."},{"key":"185_CR85","first-page":"57","volume":"29","author":"G Shmueli","year":"2016","unstructured":"Shmueli, G.: Analyzing behavioral big data: methodological, practical, ethical, and moral issues. Qual. Eng. 29, 57\u201374 (2016)","journal-title":"Qual. Eng."},{"issue":"1","key":"185_CR86","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1111\/j.0039-3193.2004.00109.x","volume":"58","author":"B Sigurd","year":"2004","unstructured":"Sigurd, B., Eeg-Olofsson, M., Van Weijer, J.: Word length, sentence length and frequency-Zipf revisited. Stud. Linguist. 58(1), 37\u201352 (2004)","journal-title":"Stud. Linguist."},{"unstructured":"Stankova, M., Martens, D., Provost, F.: Classification over bipartite graphs through projection. Technical Report, Department of Applied Economics, Antwerp University, Belgium (2014)","key":"185_CR87"},{"key":"185_CR88","first-page":"1371","volume":"15","author":"M Tan","year":"2014","unstructured":"Tan, M., Tsang, I.W., Wang, L.: Towards ultrahigh dimensional feature selection for big data. J. Mach. Learn. Res. 15, 1371\u20131429 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"185_CR89","first-page":"363","volume":"6","author":"IW Tsang","year":"2005","unstructured":"Tsang, I.W., Kwok, J.T., Cheung, P.M.: Core vector machines: fast SVM training on very large data sets. J. Mach. Learn. Res. 6, 363\u2013392 (2005)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"185_CR90","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1016\/j.asoc.2013.09.017","volume":"14","author":"W Verbeke","year":"2014","unstructured":"Verbeke, W., Martens, D., Baesens, B.: Social network analysis for customer churn prediction. Appl. Soft Comput. 14(3), 431\u2013446 (2014)","journal-title":"Appl. Soft Comput."},{"key":"185_CR91","doi-asserted-by":"crossref","first-page":"560","DOI":"10.1016\/j.mex.2016.10.004","volume":"3","author":"T Walker","year":"2016","unstructured":"Walker, T.: So much data, so little time: using sequential data analysis to monitor behavioral changes. MethodsX 3, 560\u2013568 (2016)","journal-title":"MethodsX"},{"doi-asserted-by":"crossref","unstructured":"Wallace, B.C., Small, K., Brodley, C.E., Trikalinos, T.A.: Class imbalance, redux. In: International Conference on Data Mining (ICDM), pp. 754\u2013763. IEEE (2011)","key":"185_CR92","DOI":"10.1109\/ICDM.2011.33"},{"doi-asserted-by":"crossref","unstructured":"Weinberger, K., Dasgupta, A., Langford, J., Smola, A., Attenberg, J.: Feature hashing for large scale multitask learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 1113\u20131120. ACM (2009)","key":"185_CR93","DOI":"10.1145\/1553374.1553516"},{"issue":"7","key":"185_CR94","doi-asserted-by":"crossref","first-page":"1341","DOI":"10.1162\/neco.1996.8.7.1341","volume":"8","author":"DH Wolpert","year":"1996","unstructured":"Wolpert, D.H.: The lack of a priori distinctions between learning algorithms. Neural Comput. 8(7), 1341\u20131390 (1996)","journal-title":"Neural Comput."},{"issue":"1","key":"185_CR95","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1109\/TKDE.2013.109","volume":"26","author":"X Wu","year":"2014","unstructured":"Wu, X., Zhu, X., Wu, G.Q., Ding, W.: Data mining with big data. Trans. Knowl. Data Eng. 26(1), 97\u2013107 (2014)","journal-title":"Trans. Knowl. Data Eng."},{"issue":"4","key":"185_CR96","doi-asserted-by":"crossref","first-page":"597","DOI":"10.1142\/S0219622006002258","volume":"5","author":"Q Yang","year":"2006","unstructured":"Yang, Q., Wu, X.: 10 challenging problems in data mining research. Int. J. Inf. Technol. Decis. Mak. 5(4), 597\u2013604 (2006)","journal-title":"Int. J. Inf. Technol. Decis. Mak."},{"unstructured":"Yu, H.F., Lo, H.Y., Hsieh, H.P., Lou, J.K., McKenzie, T.G., Chou, J.W., Chung, P.H., Ho, C.H., Chang, C.F., Wei, Y.H., et\u00a0al.: Feature engineering and classifier ensemble for KDD Cup 2010. In: International Conference on Knowledge Discovery and Data Mining KDD Cup 2010 Workshop (SIGKDD). ACM (2010)","key":"185_CR97"},{"issue":"1","key":"185_CR98","first-page":"49","volume":"16","author":"J Zhu","year":"2003","unstructured":"Zhu, J., Rosset, S., Hastie, T., Tibshirani, R.: 1-Norm support vector machines. Adv. Neural Inf. Process. Syst. (NIPS) 16(1), 49\u201356 (2003)","journal-title":"Adv. Neural Inf. Process. Syst. (NIPS)"},{"doi-asserted-by":"crossref","unstructured":"Ziegler, C.N., McNee, S.M., Konstan, J.A., Lausen, G.: Improving recommendation lists through topic diversification. In: International Conference on World Wide Web (WWW), pp. 22\u201332. ACM (2005)","key":"185_CR99","DOI":"10.1145\/1060745.1060754"},{"key":"185_CR100","volume-title":"Human Behavior and the Principle of Least Effort: An Introduction to Human Ecology","author":"GK Zipf","year":"2016","unstructured":"Zipf, G.K.: Human Behavior and the Principle of Least Effort: An Introduction to Human Ecology. Ravenio Books, New York (2016)"}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-019-00185-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s41060-019-00185-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-019-00185-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,18]],"date-time":"2020-04-18T23:25:34Z","timestamp":1587252334000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s41060-019-00185-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,20]]},"references-count":100,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2020,3]]}},"alternative-id":["185"],"URL":"https:\/\/doi.org\/10.1007\/s41060-019-00185-1","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"type":"print","value":"2364-415X"},{"type":"electronic","value":"2364-4168"}],"subject":[],"published":{"date-parts":[[2019,4,20]]},"assertion":[{"value":"8 January 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 April 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 April 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}