{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T22:27:08Z","timestamp":1740176828858,"version":"3.37.3"},"reference-count":75,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,10,12]],"date-time":"2022-10-12T00:00:00Z","timestamp":1665532800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,12]],"date-time":"2022-10-12T00:00:00Z","timestamp":1665532800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s41060-022-00363-8","type":"journal-article","created":{"date-parts":[[2022,10,12]],"date-time":"2022-10-12T06:02:45Z","timestamp":1665554565000},"page":"389-409","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["TOMBoost: a topic modeling based boosting approach for learning with class imbalance"],"prefix":"10.1007","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5769-2405","authenticated-orcid":false,"given":"Sudarsun","family":"Santhiappan","sequence":"first","affiliation":[]},{"given":"Jeshuren","family":"Chelladurai","sequence":"additional","affiliation":[]},{"given":"Balaraman","family":"Ravindran","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,12]]},"reference":[{"issue":"2\u20133","key":"363_CR1","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1023\/A:1007452223027","volume":"30","author":"M Kubat","year":"1998","unstructured":"Kubat, M., Holte, R.C., Matwin, S.: Machine learning for the detection of oil spills in satellite radar images. Mach. Learn. 30(2\u20133), 195\u2013215 (1998)","journal-title":"Mach. Learn."},{"issue":"2","key":"363_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2907070","volume":"49","author":"P Branco","year":"2016","unstructured":"Branco, P., Torgo, L., Ribeiro, R.P.: A survey of predictive modeling on imbalanced domains. ACM Comput. Surv. 49(2), 1\u201350 (2016)","journal-title":"ACM Comput. Surv."},{"key":"363_CR3","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1016\/j.eswa.2016.12.035","volume":"73","author":"G Haixiang","year":"2017","unstructured":"Haixiang, G., et al.: Learning from class-imbalanced data: review of methods and applications. Expert Syst. Appl. 73, 220\u2013239 (2017). https:\/\/doi.org\/10.1016\/j.eswa.2016.12.035","journal-title":"Expert Syst. Appl."},{"issue":"1","key":"363_CR4","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1186\/s40537-018-0151-6","volume":"5","author":"JL Leevy","year":"2018","unstructured":"Leevy, J.L., Khoshgoftaar, T.M., Bauder, R.A., Seliya, N.: A survey on addressing high-class imbalance in big data. J. Big Data 5(1), 42 (2018). https:\/\/doi.org\/10.1186\/s40537-018-0151-6","journal-title":"J. Big Data"},{"key":"363_CR5","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-019-0192-5","author":"JM Johnson","year":"2019","unstructured":"Johnson, J.M., Khoshgoftaar, T.M.: Survey on deep learning with class imbalance. J. Big Data (2019). https:\/\/doi.org\/10.1186\/s40537-019-0192-5","journal-title":"J. Big Data"},{"key":"363_CR6","first-page":"409","volume":"8","author":"D Mease","year":"2007","unstructured":"Mease, D., Wyner, A., Buja, A.: Boosted classification trees and class probability\/quantile estimation. J. Mach. Learn. Res. 8, 409\u2013439 (2007)","journal-title":"J. Mach. Learn. Res."},{"key":"363_CR7","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/j.ins.2013.07.007","volume":"250","author":"V Lopez","year":"2013","unstructured":"Lopez, V., Fernandez, A., Garc\u00eda, S., Palade, V., Herrera, F.: An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics. Inf. Sci. 250, 113\u2013141 (2013). https:\/\/doi.org\/10.1016\/j.ins.2013.07.007","journal-title":"Inf. Sci."},{"issue":"7","key":"363_CR8","doi-asserted-by":"publisher","first-page":"6585","DOI":"10.1016\/j.eswa.2011.12.043","volume":"39","author":"V Lopez","year":"2012","unstructured":"Lopez, V., Fernandez, A., Moreno-Torres, J.G., Herrera, F.: Analysis of preprocessing vs. cost-sensitive learning for imbalanced classification. Open problems on intrinsic data characteristics. Expert Syst. Appl. 39(7), 6585\u20136608 (2012). https:\/\/doi.org\/10.1016\/j.eswa.2011.12.043","journal-title":"Expert Syst. Appl."},{"key":"363_CR9","doi-asserted-by":"crossref","unstructured":"He, H., Ma, Y.: Imbalanced Learning: Foundations, Algorithms, and Applications, 1st edn. Wiley-IEEE Press (2013)","DOI":"10.1002\/9781118646106.ch1"},{"key":"363_CR10","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1016\/j.eswa.2016.12.035","volume":"73","author":"H Guo","year":"2017","unstructured":"Guo, H., et al.: Learning from class-imbalanced data: review of methods and applications. Expert Syst. Appl. 73, 220\u2013239 (2017)","journal-title":"Expert Syst. Appl."},{"key":"363_CR11","doi-asserted-by":"crossref","unstructured":"Agrawal, A., Viktor, H.L., Paquet, E., Fred, A.L.N., Dietz, J.L.G., Aveiro, D., Liu, K., Filipe, J.: SCUT: multi-class imbalanced data classification using SMOTE and cluster-based undersampling. In: Fred, A.L.N., Dietz, J.L.G., Aveiro, D., Liu, K., Filipe, J. (eds.) KDIR, pp. 226\u2013234. SciTePress (2015)","DOI":"10.5220\/0005595502260234"},{"key":"363_CR12","doi-asserted-by":"crossref","unstructured":"Hofmann, T.: Probabilistic Latent Semantic Analysis, pp. 289\u2013296. Morgan Kaufmann Publishers Inc. (1999)","DOI":"10.1145\/312624.312649"},{"key":"363_CR13","doi-asserted-by":"crossref","unstructured":"Kim, Y.-M., Pessiot, J.-F., Amini, M.-R., Gallinari, P., Shanahan, J.G. et\u00a0al.: An extension of PLSA for document clustering. In: Shanahan, J.G. et\u00a0al. (eds.) CIKM, pp. 1345\u20131346. ACM (2008). http:\/\/dblp.uni-trier.de\/db\/conf\/cikm\/cikm2008.html#KimPAG08","DOI":"10.1145\/1458082.1458271"},{"key":"363_CR14","unstructured":"Wang, L., Li, X., Tu, Z., Jia, J.: Discriminative clustering via generative feature mapping, pp. 1\u20137 (2012). https:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI12\/paper\/view\/5034"},{"issue":"5","key":"363_CR15","doi-asserted-by":"publisher","first-page":"1762","DOI":"10.1109\/TITS.2018.2834958","volume":"20","author":"KK Santhosh","year":"2019","unstructured":"Santhosh, K.K., Dogra, D.P., Roy, P.P.: Temporal unknown incremental clustering model for analysis of traffic surveillance videos. IEEE Trans. Intell. Transp. Syst. 20(5), 1762\u20131773 (2019). https:\/\/doi.org\/10.1109\/TITS.2018.2834958","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"363_CR16","unstructured":"Griffiths, A.J., Gelbart, W.M., Lewontin, R.C., Miller, J.H.: Modern Genetic Analysis: Integrating Genes and Genomes, vol. 1. Macmillan (2002)"},{"key":"363_CR17","doi-asserted-by":"publisher","first-page":"945","DOI":"10.1093\/genetics\/155.2.945","volume":"155","author":"JK Pritchard","year":"2000","unstructured":"Pritchard, J.K., Stephens, M., Donnelly, P.: Inference of population structure using multilocus genotype data. Genetics 155, 945\u2013959 (2000)","journal-title":"Genetics"},{"issue":"8","key":"363_CR18","doi-asserted-by":"publisher","first-page":"4148","DOI":"10.1109\/TCYB.2019.2931139","volume":"51","author":"KK Santhosh","year":"2021","unstructured":"Santhosh, K.K., Dogra, D.P., Roy, P.P., Chaudhuri, B.B.: Trajectory-based scene understanding using Dirichlet process mixture model. IEEE Trans. Cybern. 51(8), 4148\u20134161 (2021). https:\/\/doi.org\/10.1109\/TCYB.2019.2931139","journal-title":"IEEE Trans. Cybern."},{"key":"363_CR19","doi-asserted-by":"crossref","unstructured":"Kennedy, T.F., et\u00a0al.: Topic Models for RFID Data Modeling and Localization, pp. 1438\u20131446 (2017)","DOI":"10.1109\/BigData.2017.8258077"},{"key":"363_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3161623","author":"X Chen","year":"2022","unstructured":"Chen, X., Huang, K., Jiang, H.: Detecting changes in the spatiotemporal pattern of bike sharing: a change-point topic model. IEEE Trans. Intell. Transp. Syst. (2022). https:\/\/doi.org\/10.1109\/TITS.2022.3161623","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"1","key":"363_CR21","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1006\/jcss.1997.1504","volume":"55","author":"Y Freund","year":"1997","unstructured":"Freund, Y., Schapire, R.E.: A decision-theoretic generalization of on-line learning and an application to boosting. J. Comput. Syst. Sci. 55(1), 119\u2013139 (1997). https:\/\/doi.org\/10.1006\/jcss.1997.1504","journal-title":"J. Comput. Syst. Sci."},{"key":"363_CR22","doi-asserted-by":"crossref","unstructured":"Sun, Y., Kamel, M.S., Wang, Y.: Boosting for learning multiple classes with imbalanced class distribution, pp. 592\u2013602. IEEE Computer Society (2006). http:\/\/dblp.uni-trier.de\/db\/conf\/icdm\/icdm2006.html#SunKW06","DOI":"10.1109\/ICDM.2006.29"},{"key":"363_CR23","doi-asserted-by":"crossref","unstructured":"Schapire, R.E.: Boosting: Foundations and Algorithms (2013)","DOI":"10.7551\/mitpress\/8291.001.0001"},{"key":"363_CR24","first-page":"993","volume":"3","author":"D Blei","year":"2003","unstructured":"Blei, D., Ng, A., Jordan, M.: Latent Dirichlet allocation. J. Mach. Learn. Res. 3, 993\u20131022 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"363_CR25","unstructured":"Drummond, C., Holte, R.: C4.5, class imbalance, and cost sensitivity: why under-sampling beats over-sampling, pp. 1\u20138 (2003). http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.68.6858& rep=rep1& type=pdf"},{"key":"363_CR26","unstructured":"Holte, R.C., Acker, L., Porter, B.W., Sridharan, N.S. Concept learning and the problem of small disjuncts. In: Sridharan, N.S. (ed.) IJCAI, pp. 813\u2013818. Morgan Kaufmann (1989). http:\/\/dblp.uni-trier.de\/db\/conf\/ijcai\/ijcai89.html#HolteAP89"},{"key":"363_CR27","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"N Chawla","year":"2002","unstructured":"Chawla, N., Bowyer, K., Hall, L., Kegelmeyer, W.: SMOTE: synthetic minority over-sampling technique. J. Artif. Intell. Res. 16, 321\u2013357 (2002)","journal-title":"J. Artif. Intell. Res."},{"key":"363_CR28","doi-asserted-by":"crossref","unstructured":"Han, H., Wang, W. & Mao, B. Huang, D.-S., Zhang, X.-P., Huang, G.-B. Borderline-SMOTE: a new over-sampling method in imbalanced data sets learning. In: Huang, D.-S., Zhang, X.-P., Huang, G.-B. (eds.) ICIC (1), Lecture Notes in Computer Science, vol. 3644, pp. 878\u2013887. Springer (2005). http:\/\/dblp.uni-trier.de\/db\/conf\/icic\/icic2005-1.html#HanWM05","DOI":"10.1007\/11538059_91"},{"issue":"2","key":"363_CR29","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1109\/TKDE.2012.232","volume":"26","author":"S Barua","year":"2014","unstructured":"Barua, S., Islam, M.M., Yao, X., Murase, K.: MWMOTE-majority weighted minority oversampling technique for imbalanced data set learning. IEEE Trans. Knowl. Data Eng. 26(2), 405\u2013425 (2014)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"2","key":"363_CR30","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1016\/j.ipm.2010.07.003","volume":"47","author":"E Chen","year":"2011","unstructured":"Chen, E., Lin, Y., Xiong, H., Luo, Q., Ma, H.: Exploiting probabilistic topic models to improve text categorization under class imbalance. Inf. Process. Manag. 47(2), 202\u2013214 (2011). https:\/\/doi.org\/10.1016\/j.ipm.2010.07.003","journal-title":"Inf. Process. Manag."},{"key":"363_CR31","doi-asserted-by":"publisher","unstructured":"Barredo Arrieta, A., et\u00a0al.: Explainable artificial intelligence (XAI): concepts, taxonomies, opportunities and challenges toward responsible AI. Inf. Fus. (2020). https:\/\/doi.org\/10.1016\/j.inffus.2019.12.012, arXiv:1910.10045","DOI":"10.1016\/j.inffus.2019.12.012"},{"issue":"3","key":"363_CR32","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1007\/s10994-017-5670-4","volume":"107","author":"C Bellinger","year":"2018","unstructured":"Bellinger, C., Drummond, C., Japkowicz, N.: Manifold-based synthetic oversampling with manifold conformance estimation. Mach. Learn. 107(3), 605\u2013637 (2018). https:\/\/doi.org\/10.1007\/s10994-017-5670-4","journal-title":"Mach. Learn."},{"key":"363_CR33","doi-asserted-by":"publisher","unstructured":"Santhiappan, S., Chelladurai, J., Ravindran, B.: A novel topic modeling based weighting framework for class imbalance learning. In: CoDS-COMAD\u201918, pp. 20\u201329. ACM, New York (2018). https:\/\/doi.org\/10.1145\/3152494.3152496","DOI":"10.1145\/3152494.3152496"},{"key":"363_CR34","unstructured":"Peng, Y. Bonet, B., Koenig, S.: Adaptive sampling with optimal cost for class-imbalance learning. In: Bonet, B. & Koenig, S. (eds.) AAAI, pp. 2921\u20132927. AAAI Press (2015). http:\/\/dblp.uni-trier.de\/db\/conf\/aaai\/aaai2015.html#Peng15"},{"key":"363_CR35","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1016\/j.eswa.2015.10.031","volume":"46","author":"I Nekooeimehr","year":"2016","unstructured":"Nekooeimehr, I., Lai-Yuen, S.K.: Adaptive semi-unsupervised weighted oversampling (A-SUWO) for imbalanced datasets. Expert Syst. Appl. 46, 405\u2013416 (2016)","journal-title":"Expert Syst. Appl."},{"key":"363_CR36","doi-asserted-by":"crossref","unstructured":"Mustafa, G., Niu, Z., Yousif, A., Tarus, J.: Distribution based ensemble for class imbalance learning, pp. 5\u201310 (2015)","DOI":"10.1109\/INTECH.2015.7173365"},{"issue":"4","key":"363_CR37","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1109\/TSMCC.2011.2161285","volume":"42","author":"M Galar","year":"2012","unstructured":"Galar, M., Fernandez, A., Barrenechea, E., Bustince, H., Herrera, F.: A review on ensembles for the class imbalance problem: bagging-, boosting-, and hybrid-based approaches. IEEE Trans. Syst. Man Cybern. Part C Appl. Rev. 42(4), 463\u2013484 (2012). https:\/\/doi.org\/10.1109\/TSMCC.2011.2161285","journal-title":"IEEE Trans. Syst. Man Cybern. Part C Appl. Rev."},{"key":"363_CR38","doi-asserted-by":"crossref","unstructured":"Chawla, N.V., Lazarevic, A., Hall, L.O., Bowyer, K.W., Lavrac, N., Gamberger, D., Blockeel, H., Todorovski, L.: SMOTEBoost: improving prediction of the minority class in boosting. In: Lavrac, N., Gamberger, D., Blockeel, H., Todorovski, L. (eds.) PKDD, Lecture Notes in Computer Science, vol. 2838, pp. 107\u2013119. Springer (2003). http:\/\/dblp.uni-trier.de\/db\/conf\/pkdd\/pkdd2003.html#ChawlaLHB03","DOI":"10.1007\/978-3-540-39804-2_12"},{"issue":"1","key":"363_CR39","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1145\/1007730.1007736","volume":"6","author":"H Guo","year":"2004","unstructured":"Guo, H., Viktor, H.L.: Learning from imbalanced data sets with boosting and data generation: the DataBoost-IM approach. SIGKDD Explor. 6(1), 30\u201339 (2004). https:\/\/doi.org\/10.1145\/1007730.1007736","journal-title":"SIGKDD Explor."},{"issue":"1","key":"363_CR40","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1109\/TSMCA.2009.2029559","volume":"40","author":"C Seiffert","year":"2010","unstructured":"Seiffert, C., Khoshgoftaar, T.M., Hulse, J.V., Napolitano, A.: RUSBoost: a hybrid approach to alleviating class imbalance. IEEE Trans. Syst. Man Cybern. Part A 40(1), 185\u2013197 (2010)","journal-title":"IEEE Trans. Syst. Man Cybern. Part A"},{"key":"363_CR41","doi-asserted-by":"crossref","unstructured":"Rayhan, F. et\u00a0al. Cusboost: cluster-based under-sampling with boosting for imbalanced classification. CoRR (2017). arXiv:1712.04356","DOI":"10.1109\/CSITSS.2017.8447534"},{"key":"363_CR42","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.ins.2017.05.008","volume":"409\u2013410","author":"W-C Lin","year":"2017","unstructured":"Lin, W.-C., Tsai, C.-F., Hu, Y.-H., Jhang, J.-S.: Clustering-based undersampling in class-imbalanced data. Inf. Sci. 409\u2013410, 17\u201326 (2017). https:\/\/doi.org\/10.1016\/j.ins.2017.05.008","journal-title":"Inf. Sci."},{"key":"363_CR43","doi-asserted-by":"crossref","unstructured":"Lingchi, C., Xiaoheng, D., Hailan, S., Congxu, Z., Le, C.: Dycusboost: Adaboost-based imbalanced learning using dynamic clustering and undersampling, pp. 208\u2013215 (2018)","DOI":"10.1109\/DASC\/PiCom\/DataCom\/CyberSciTec.2018.00045"},{"issue":"11","key":"363_CR44","first-page":"1403","volume":"35","author":"J-F Ge","year":"2009","unstructured":"Ge, J.-F., Luo, Y.-P.: A comprehensive study for asymmetric AdaBoost and its application in object detection. Acta Automatica Sinica 35(11), 1403\u20131409 (2009)","journal-title":"Acta Automatica Sinica"},{"key":"363_CR45","unstructured":"Fan, W., Stolfo, S.J., Zhang, J., Chan, P.K., Bratko, I., Dzeroski, S.: AdaCost: misclassification cost-sensitive boosting. In: Bratko, I., Dzeroski, S. (eds.) ICML, pp. 97\u2013105. Morgan Kaufmann (1999). http:\/\/dblp.uni-trier.de\/db\/conf\/icml\/icml1999.html#FanSZC99"},{"key":"363_CR46","unstructured":"Domingos, P.M. Fayyad, U.M., Chaudhuri, S., Madigan, D.: MetaCost: a general method for making classifiers cost-sensitive. In: Fayyad, U.M., Chaudhuri, S., Madigan, D. (eds.) KDD, pp. 155\u2013164. ACM (1999). http:\/\/dblp.uni-trier.de\/db\/conf\/kdd\/kdd99.html#Domingos99"},{"key":"363_CR47","unstructured":"Zadrozny, B., Langford, J., Abe, N.: Cost-sensitive learning by cost-proportionate example weighting, p. 435. IEEE Computer Society (2003). http:\/\/dblp.uni-trier.de\/db\/conf\/icdm\/icdm2003.html#ZadroznyLA03"},{"key":"363_CR48","doi-asserted-by":"crossref","unstructured":"Yang, Y., Xiao, P., Cheng, Y., Liu, W., Huang, Z.: Ensemble strategy for hard classifying samples in class-imbalanced data set, pp. 170\u2013175 (2018)","DOI":"10.1109\/BigComp.2018.00033"},{"key":"363_CR49","doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: Xgboost: a scalable tree boosting system. CoRR (2016). arXiv:1603.02754","DOI":"10.1145\/2939672.2939785"},{"key":"363_CR50","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csda.2017.01.005","volume":"111","author":"J Gong","year":"2017","unstructured":"Gong, J., Kim, H.: Rhsboost: improving classification performance in imbalance data. Comput. Stat. Data Anal. 111, 1\u201313 (2017). https:\/\/doi.org\/10.1016\/j.csda.2017.01.005","journal-title":"Comput. Stat. Data Anal."},{"issue":"1","key":"363_CR51","doi-asserted-by":"publisher","first-page":"82","DOI":"10.32614\/RJ-2014-008","volume":"6","author":"N Lunardon","year":"2014","unstructured":"Lunardon, N., Menardi, G., Torelli, N.: ROSE: a package for binary imbalanced learning. R J. 6(1), 82\u201392 (2014)","journal-title":"R J."},{"key":"363_CR52","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1016\/j.jss.2017.07.006","volume":"132","author":"W Lu","year":"2017","unstructured":"Lu, W., Li, Z., Chu, J.: Adaptive ensemble undersampling-boost: a novel learning framework for imbalanced data. J. Syst. Softw. 132, 272\u2013282 (2017). https:\/\/doi.org\/10.1016\/j.jss.2017.07.006","journal-title":"J. Syst. Softw."},{"key":"363_CR53","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/j.ins.2018.10.029","volume":"477","author":"C-F Tsai","year":"2019","unstructured":"Tsai, C.-F., Lin, W.-C., Hu, Y.-H., Yao, G.-T.: Under-sampling class imbalanced datasets by combining clustering analysis and instance selection. Inf. Sci. 477, 47\u201354 (2019). https:\/\/doi.org\/10.1016\/j.ins.2018.10.029","journal-title":"Inf. Sci."},{"key":"363_CR54","doi-asserted-by":"publisher","unstructured":"Sun, L., Song, J., Hua, C., Shen, C., Song, M.: Value-aware resampling and loss for imbalanced classification. In: CSAE\u201918, pp. 1\u20136. ACM, New York (2018). https:\/\/doi.org\/10.1145\/3207677.3278084","DOI":"10.1145\/3207677.3278084"},{"key":"363_CR55","unstructured":"Hofmann, T.: Unsupervised Learning from Dyadic Data, pp. 466\u2013472. MIT Press (1998)"},{"key":"363_CR56","unstructured":"Sakai, Y., Iwata, K.: Extremal relations between Shannon entropy and $$\\ell \\alpha $$-norm, pp. 428\u2013432 (2016)"},{"key":"363_CR57","doi-asserted-by":"crossref","unstructured":"Blei, D.M.: Introduction to probabilistic topic models. Commun. ACM (2011). http:\/\/www.cs.princeton.edu\/~blei\/papers\/Blei2011.pdf","DOI":"10.1145\/2107736.2107741"},{"key":"363_CR58","first-page":"63","volume":"13","author":"H Xiao","year":"2010","unstructured":"Xiao, H., Stibor, T.: Efficient collapsed Gibbs sampling for latent Dirichlet allocation. J. Mach. Learn. Res. Proc. Track 13, 63\u201378 (2010)","journal-title":"J. Mach. Learn. Res. Proc. Track"},{"key":"363_CR59","unstructured":"Phan, X.-H., Nguyen, C.-T.: gibbslda (2008). http:\/\/gibbslda.sourceforge.net\/"},{"key":"363_CR60","unstructured":"Blei, D.M.: lda-c (2003). http:\/\/www.cs.princeton.edu\/~blei\/lda-c\/"},{"key":"363_CR61","unstructured":"Le\u00e3es, A., Fernandes, P., Lopes, L., Assun\u00e7\u00e3o, J.: Classifying with adaboost.m1: the training error threshold myth, pp. 1\u20137 (2017). https:\/\/aaai.org\/ocs\/index.php\/FLAIRS\/FLAIRS17\/paper\/view\/15498"},{"key":"363_CR62","unstructured":"He, H., Bai, Y., Garcia, E.A., Li, S.: ADASYN: adaptive synthetic sampling approach for imbalanced learning, pp. 1322\u20131328. IEEE (2008). http:\/\/dblp.uni-trier.de\/db\/conf\/ijcnn\/ijcnn2008.html#HeBGL08"},{"key":"363_CR63","doi-asserted-by":"publisher","first-page":"5718","DOI":"10.1016\/j.eswa.2008.06.108","volume":"36","author":"S-J Yen","year":"2006","unstructured":"Yen, S.-J., Lee, Y.-S.: Cluster-based under-sampling approaches for imbalanced data distributions. Expert Syst. Appl. 36, 5718\u20135727 (2006). https:\/\/doi.org\/10.1016\/j.eswa.2008.06.108","journal-title":"Expert Syst. Appl."},{"key":"363_CR64","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1109\/TIT.1968.1054155","volume":"14","author":"PE Hart","year":"1968","unstructured":"Hart, P.E.: The condensed nearest neighbor rule. IEEE Trans. Inf. Theory 14, 515\u2013516 (1968)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"2","key":"363_CR65","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/s10994-013-5422-z","volume":"95","author":"MR Smith","year":"2014","unstructured":"Smith, M.R., Martinez, T., Giraud-Carrier, C.: An instance level analysis of data complexity. Mach. Learn. 95(2), 225\u2013256 (2014). https:\/\/doi.org\/10.1007\/s10994-013-5422-z","journal-title":"Mach. Learn."},{"key":"363_CR66","unstructured":"Last, F., Douzas, G., Ba\u00e7\u00e3o, F.: Oversampling for imbalanced learning based on k-means and SMOTE. CoRR (2017). arXiv:1711.00837"},{"key":"363_CR67","unstructured":"Zhang, J., Mani, I.: KNN Approach to Unbalanced Data Distributions: A Case Study Involving Information Extraction, pp. 1\u20137 (2003)"},{"key":"363_CR68","unstructured":"Kubat, M.: Addressing the curse of imbalanced training sets: one-sided selection. In: Fourteenth International Conference on Machine Learning (2000)"},{"key":"363_CR69","unstructured":"Batista, G., Bazzan, A., Monard, M.-C.: Balancing training data for automated annotation of keywords: a case study, pp. 10\u201318 (2003)"},{"issue":"2","key":"363_CR70","first-page":"679","volume":"7","author":"I Tomek","year":"1976","unstructured":"Tomek, I.: Two modifications of CNN. IEEE Trans. Syst. Man Cybern. 7(2), 679\u2013772 (1976)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"issue":"Suppl 13","key":"363_CR71","doi-asserted-by":"publisher","first-page":"S8","DOI":"10.1186\/1471-2105-16-S13-S8","volume":"16 Suppl 13","author":"W Zhao","year":"2015","unstructured":"Zhao, W., et al.: A heuristic approach to determine an appropriate number of topics in topic modeling. BMC Bioinform. 16 Suppl 13(Suppl 13), S8\u2013S8 (2015). https:\/\/doi.org\/10.1186\/1471-2105-16-S13-S8","journal-title":"BMC Bioinform."},{"key":"363_CR72","doi-asserted-by":"crossref","unstructured":"Terragni, S., Fersini, E., Galuzzi, B.\u00a0G., Tropeano, P., Candelieri, A.: OCTIS: comparing and optimizing topic models is simple!, pp. 263\u2013270. Association for Computational Linguistics, Online (2021). https:\/\/aclanthology.org\/2021.eacl-demos.31","DOI":"10.18653\/v1\/2021.eacl-demos.31"},{"key":"363_CR73","doi-asserted-by":"crossref","unstructured":"Terragni, S., Fersini, E.: Fersini, E., Passarotti, M., Patti, V.: OCTIS 2.0: optimizing and comparing topic models in Italian is even simpler!. In: Fersini, E., Passarotti, M., Patti, V. (eds.) Proceedings of the Eighth Italian Conference on Computational Linguistics, CLiC-it 2021, Milan, Italy, January 26\u201328, 2022, CEUR Workshop Proceedings, vol. 3033. CEUR-WS.org (2021). http:\/\/ceur-ws.org\/Vol-3033\/paper55.pdf","DOI":"10.4000\/books.aaccademia.10863"},{"key":"363_CR74","unstructured":"Lichman, M.: UCI Machine Learning Repository (2013). http:\/\/archive.ics.uci.edu\/ml"},{"key":"363_CR75","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1186\/1471-2105-14-106","volume":"14","author":"R Blagus","year":"2013","unstructured":"Blagus, R., Lusa, L.: SMOTE for high-dimensional class-imbalanced data. BMC Bioinform. 14, 106 (2013)","journal-title":"BMC Bioinform."}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-022-00363-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41060-022-00363-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-022-00363-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,10]],"date-time":"2024-05-10T06:30:00Z","timestamp":1715322600000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41060-022-00363-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,12]]},"references-count":75,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["363"],"URL":"https:\/\/doi.org\/10.1007\/s41060-022-00363-8","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"type":"print","value":"2364-415X"},{"type":"electronic","value":"2364-4168"}],"subject":[],"published":{"date-parts":[[2022,10,12]]},"assertion":[{"value":"30 December 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 September 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 October 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}