{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T23:20:26Z","timestamp":1777504826840,"version":"3.51.4"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T00:00:00Z","timestamp":1746403200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T00:00:00Z","timestamp":1746403200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"DOI":"10.1186\/s40537-025-01154-1","type":"journal-article","created":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T06:16:08Z","timestamp":1746425768000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Unsupervised feature selection and class labeling for credit card fraud"],"prefix":"10.1186","volume":"12","author":[{"given":"Robert K. L.","family":"Kennedy","sequence":"first","affiliation":[]},{"given":"Flavio","family":"Villanustre","sequence":"additional","affiliation":[]},{"given":"Taghi M.","family":"Khoshgoftaar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,5]]},"reference":[{"key":"1154_CR1","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L.-J, Li K, Fei-Fei L. Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009). Ieee","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"3","key":"1154_CR2","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/3446776","volume":"64","author":"C Zhang","year":"2021","unstructured":"Zhang C, Bengio S, Hardt M, Recht B, Vinyals O. Understanding deep learning (still) requires rethinking generalization. Commun ACM. 2021;64(3):107\u201315.","journal-title":"Commun ACM"},{"issue":"2","key":"1154_CR3","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MIS.2009.36","volume":"24","author":"A Halevy","year":"2009","unstructured":"Halevy A, Norvig P, Pereira F. The unreasonable effectiveness of data. IEEE Intell Syst. 2009;24(2):8\u201312.","journal-title":"IEEE Intell Syst"},{"key":"1154_CR4","doi-asserted-by":"crossref","unstructured":"Sun C, Shrivastava A, Singh S, Gupta A. Revisiting unreasonable effectiveness of data in deep learning era. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 843\u2013852. 2017.","DOI":"10.1109\/ICCV.2017.97"},{"key":"1154_CR5","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.media.2017.07.005","volume":"42","author":"G Litjens","year":"2017","unstructured":"Litjens G, Kooi T, Bejnordi BE, Setio A, Ciompi F, Ghafoorian M, Van Der Laak JA, Van Ginneken B, S\u00e1nchez CI. A survey on deep learning in medical image analysis. Med Image Anal. 2017;42:60\u201388.","journal-title":"Med Image Anal"},{"issue":"3","key":"1154_CR6","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1111\/jori.12427","volume":"90","author":"J Debener","year":"2023","unstructured":"Debener J, Heinke V, Kriebel J. Detecting insurance fraud using supervised and unsupervised machine learning. J Risk Insurance. 2023;90(3):743\u201368.","journal-title":"J Risk Insurance"},{"key":"1154_CR7","unstructured":"Xie J, Girshick R, Farhadi A. Unsupervised deep embedding for clustering analysis. In: International Conference on Machine Learning, pp. 478\u2013487. 2016. PMLR."},{"key":"1154_CR8","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1023\/A:1022604100933","volume":"4","author":"J Mingers","year":"1989","unstructured":"Mingers J. An empirical comparison of pruning methods for decision tree induction. Mach Learn. 1989;4:227\u201343.","journal-title":"Mach Learn"},{"issue":"1","key":"1154_CR9","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R. Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res. 2014;15(1):1929\u201358.","journal-title":"J Mach Learn Res"},{"issue":"4","key":"1154_CR10","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/s13748-016-0094-0","volume":"5","author":"B Krawczyk","year":"2016","unstructured":"Krawczyk B. Learning from imbalanced data: open challenges and future directions. Prog Artif Intell. 2016;5(4):221\u201332.","journal-title":"Prog Artif Intell"},{"key":"1154_CR11","doi-asserted-by":"crossref","unstructured":"Khoshgoftaar TM, Seiffert C, Van\u00a0Hulse J, Napolitano A, Folleco A. Learning with limited minority class data. In: Machine Learning and Applications, 2007. ICMLA 2007. Sixth International Conference On, pp. 348\u2013353. 2007. IEEE.","DOI":"10.1109\/ICMLA.2007.76"},{"issue":"1","key":"1154_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-024-00897-7","volume":"11","author":"RK Kennedy","year":"2024","unstructured":"Kennedy RK, Villanustre F, Khoshgoftaar TM, Salekshahrezaee Z. Synthesizing class labels for highly imbalanced credit card fraud detection data. J Big Data. 2024;11(1):1\u201322.","journal-title":"J Big Data"},{"key":"1154_CR13","unstructured":"Lundberg SM, Lee S-I. A unified approach to interpreting model predictions. In: Guyon I, Luxburg UV, Bengio S, Wallach H, Fergus R, Vishwanathan S, Garnett R. (eds.) Advances in Neural Information Processing Systems 30, pp. 4765\u20134774. Curran Associates, Inc., Red Hook, NY, USA (2017). http:\/\/papers.nips.cc\/paper\/7062-a-unified-approach-to-interpreting-model-predictions.pdf."},{"key":"1154_CR14","doi-asserted-by":"crossref","unstructured":"Dal\u00a0Pozzolo A, Caelen O, Johnson RA, Bontempi G. Calibrating probability with undersampling for unbalanced classification. In: 2015 IEEE Symposium Series on Computational Intelligence, pp. 159\u2013166. 2015. IEEE","DOI":"10.1109\/SSCI.2015.33"},{"key":"1154_CR15","unstructured":"Kaggle: Credit Card Fraud Detection. https:\/\/www.kaggle.com\/mlg-ulb\/creditcardfraud. 2018."},{"key":"1154_CR16","doi-asserted-by":"crossref","unstructured":"Liu FT, Ting KM, Zhou Z-H. Isolation forest. In: 2008 Eighth Ieee International Conference on Data Mining, pp. 413\u2013422. 2008. IEEE.","DOI":"10.1109\/ICDM.2008.17"},{"key":"1154_CR17","doi-asserted-by":"crossref","unstructured":"Leevy JL, Hancock J, Khoshgoftaar TM, Abdollah\u00a0Zadeh A. Investigating the effectiveness of one-class and binary classification for fraud detection. J Big Data. 2023.","DOI":"10.1186\/s40537-023-00825-1"},{"key":"1154_CR18","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-024-01041-1","author":"JT Hancock","year":"2024","unstructured":"Hancock JT, Khoshgoftaar TM, Liang Q. A problem-agnostic approach to feature selection and analysis using shap. J Big Data. 2024. https:\/\/doi.org\/10.1186\/s40537-024-01041-1.","journal-title":"J Big Data"},{"issue":"6","key":"1154_CR19","first-page":"1","volume":"10","author":"Z Salekshahrezaee","year":"2023","unstructured":"Salekshahrezaee Z, Leevy JL, Khoshgoftaar TM. The effect of feature extraction and data sampling on credit card fraud detection. J Big Data. 2023;10(6):1\u201317.","journal-title":"J Big Data"},{"key":"1154_CR20","doi-asserted-by":"crossref","unstructured":"Masci J, Meier U, Cire\u015fan D, Schmidhuber J. Stacked convolutional auto-encoders for hierarchical feature extraction. In: Artificial Neural Networks and Machine Learning\u2013ICANN 2011: 21st International Conference on Artificial Neural Networks, Espoo, Finland, June 14-17, 2011, Proceedings, Part I 21, pp. 52\u201359. 2011. Springer.","DOI":"10.1007\/978-3-642-21735-7_7"},{"issue":"6","key":"1154_CR21","doi-asserted-by":"publisher","first-page":"305","DOI":"10.3390\/systems11060305","volume":"11","author":"S Jiang","year":"2023","unstructured":"Jiang S, Dong R, Wang J, Xia M. Credit card fraud detection based on unsupervised attentional anomaly detection network. Systems. 2023;11(6):305.","journal-title":"Systems"},{"key":"1154_CR22","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y. Generative adversarial nets. Adv Neural Inf Proc Syst. 2014;27."},{"key":"1154_CR23","doi-asserted-by":"crossref","unstructured":"Rezapour M. Anomaly detection using unsupervised methods: credit card fraud case study. Int J Adv Comput Sci Appl. 2019;10(11).","DOI":"10.14569\/IJACSA.2019.0101101"},{"key":"1154_CR24","doi-asserted-by":"crossref","unstructured":"Sch\u00f6lkopf B, Williamson R.C, Smola A, Shawe-Taylor J, Platt J. Support vector method for novelty detection. Adv Neural Inf Proc Syst. 1999;12.","DOI":"10.1162\/089976600300015565"},{"key":"1154_CR25","unstructured":"Ng A, et al. Sparse autoencoder. CS294A Lecture notes. 2011;72(2011): 1\u201319."},{"issue":"5","key":"1154_CR26","first-page":"2621","volume":"27","author":"F Moslehi","year":"2020","unstructured":"Moslehi F, Haeri A, Gholamian MR. A novel selective clustering framework for appropriate labeling of clusters based on k-means algorithm. Scientia Iranica. 2020;27(5):2621\u201334.","journal-title":"Scientia Iranica"},{"key":"1154_CR27","doi-asserted-by":"crossref","unstructured":"Rauber A. Labelsom: on the labeling of self-organizing maps. In: IJCNN\u201999. International Joint Conference on Neural Networks. Proceedings (Cat. No. 99CH36339), vol. 5, pp. 3527\u20133532. 1999. IEEE.","DOI":"10.1109\/IJCNN.1999.836235"},{"issue":"9","key":"1154_CR28","doi-asserted-by":"publisher","first-page":"1464","DOI":"10.1109\/5.58325","volume":"78","author":"T Kohonen","year":"1990","unstructured":"Kohonen T. The self-organizing map. Proc IEEE. 1990;78(9):1464\u201380.","journal-title":"Proc IEEE"},{"issue":"11","key":"1154_CR29","doi-asserted-by":"publisher","first-page":"1632","DOI":"10.1016\/j.jss.2006.03.013","volume":"79","author":"O Maqbool","year":"2006","unstructured":"Maqbool O, Babri HA. Automated software clustering: an insight using cluster labels. J Syst Softw. 2006;79(11):1632\u201348.","journal-title":"J Syst Softw"},{"key":"1154_CR30","doi-asserted-by":"crossref","unstructured":"Kennedy RK, Salekshahrezaee Z, Khoshgoftaar TM. A novel approach for unsupervised learning of highly-imbalanced data. In: 2022 IEEE 4th International Conference on Cognitive Machine Intelligence (CogMI), pp. 52\u201358. 2022. IEEE.","DOI":"10.1109\/CogMI56440.2022.00018"},{"key":"1154_CR31","doi-asserted-by":"crossref","unstructured":"Wan Z, Zhang Y, He H. Variational autoencoder based synthetic data generation for imbalanced learning. In: 2017 IEEE Symposium Series on Computational Intelligence (SSCI), pp. 1\u20137. 2017. IEEE.","DOI":"10.1109\/SSCI.2017.8285168"},{"key":"1154_CR32","doi-asserted-by":"crossref","unstructured":"Kennedy RK, Salekshahrezaee Z, Khoshgoftaar TM. Unsupervised anomaly detection of class imbalanced cognition data using an iterative cleaning method. In: 2023 IEEE 24th International Conference on Information Reuse and Integration for Data Science (IRI), pp. 303\u2013308. 2023. IEEE.","DOI":"10.1109\/IRI58017.2023.00060"},{"key":"1154_CR33","unstructured":"Chollet F, et al. Keras. https:\/\/keras.io. 2015."},{"key":"1154_CR34","doi-asserted-by":"crossref","unstructured":"Leevy JL, Khoshgoftaar TM, Hancock J. Evaluating performance metrics for credit card fraud classification. In: 2022 IEEE 34th International Conference on Tools with Artificial Intelligence (ICTAI), pp. 1336\u20131341. 2022. IEEE.","DOI":"10.1109\/ICTAI56018.2022.00202"},{"key":"1154_CR35","unstructured":"Provost FJ, Fawcett T, et al. Analysis and visualization of classifier performance: comparison under imprecise class and cost distributions. In: KDD, vol. 97, pp. 43\u201348. 1997."},{"key":"1154_CR36","doi-asserted-by":"crossref","unstructured":"Davis J, Goadrich M. The relationship between precision-recall and roc curves. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 233\u2013240. 2006.","DOI":"10.1145\/1143844.1143874"},{"key":"1154_CR37","doi-asserted-by":"crossref","unstructured":"Wang H, Liang Q, Hancock JT, Khoshgoftaar TM. Enhancing credit card fraud detection through a novel ensemble feature selection technique. In: 2023 IEEE 24th International Conference on Information Reuse and Integration for Data Science (IRI), pp. 121\u2013126. 2023. IEEE.","DOI":"10.1109\/IRI58017.2023.00028"},{"issue":"1","key":"1154_CR38","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1186\/s40537-023-00724-5","volume":"10","author":"JT Hancock","year":"2023","unstructured":"Hancock JT, Khoshgoftaar TM, Johnson JM. Evaluating classifier performance with highly imbalanced big data. J Big Data. 2023;10(1):42.","journal-title":"J Big Data"},{"key":"1154_CR39","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L. Random forests. Mach Learn. 2001;45:5\u201332.","journal-title":"Mach Learn"},{"issue":"6","key":"1154_CR40","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1037\/h0042519","volume":"65","author":"F Rosenblatt","year":"1958","unstructured":"Rosenblatt F. The perceptron: a probabilistic model for information storage and organization in the brain. Psychol Rev. 1958;65(6):386.","journal-title":"Psychol Rev"},{"key":"1154_CR41","doi-asserted-by":"crossref","unstructured":"Bauder RA, da Rosa R, Khoshgoftaar TM. Identifying medicare provider fraud with unsupervised machine learning. In: 2018 IEEE International Conference on Information Reuse and Integration (IRI), pp. 285\u2013292. 2018. IEEE.","DOI":"10.1109\/IRI.2018.00051"},{"key":"1154_CR42","unstructured":"scikit-learn: Scikit-learn. http:\/\/scikit-learn.org\/stable\/."},{"key":"1154_CR43","unstructured":"Kingma DP, Ba J. Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980. 2014."},{"key":"1154_CR44","unstructured":"Abdi H, Williams LJ. Tukey\u2019s honestly significant difference (hsd) test. Encyclopedia of Research Design. Thousand Oaks, CA: Sage, 1\u20135. 2010."},{"key":"1154_CR45","volume-title":"Intermediate statistical methods and applications: a computer package approach","author":"M Berenson","year":"1983","unstructured":"Berenson M, Levine D, Goldstein M. Intermediate statistical methods and applications: a computer package approach. Englewood Cliffs: Prentice-Hall; 1983."}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01154-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s40537-025-01154-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01154-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T18:01:59Z","timestamp":1746468119000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-025-01154-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,5]]},"references-count":45,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1154"],"URL":"https:\/\/doi.org\/10.1186\/s40537-025-01154-1","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,5]]},"assertion":[{"value":"12 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no Conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"111"}}