{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T21:38:39Z","timestamp":1773524319837,"version":"3.50.1"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T00:00:00Z","timestamp":1747094400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T00:00:00Z","timestamp":1747094400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s41060-025-00794-z","type":"journal-article","created":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T06:00:28Z","timestamp":1747116028000},"page":"5585-5602","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enhanced instance selection for large-scale data using integrated clustering and autoencoder techniques"],"prefix":"10.1007","volume":"20","author":[{"given":"Mohammad","family":"Nazari","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hamid","family":"Saadatfar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,13]]},"reference":[{"issue":"70","key":"794_CR1","first-page":"1","volume":"6","author":"D Laney","year":"2001","unstructured":"Laney, D.: 3D data management: controlling data volume, velocity and variety. META group Res. Note 6(70), 1 (2001)","journal-title":"META group Res. Note"},{"issue":"2","key":"794_CR2","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1093\/nsr\/nwt032","volume":"1","author":"J Fan","year":"2014","unstructured":"Fan, J., Han, F., Liu, H.: Challenges of big data analysis. Natl. Sci. Rev. 1(2), 293\u2013314 (2014)","journal-title":"Natl. Sci. Rev."},{"issue":"2000","key":"794_CR3","first-page":"32","volume":"1","author":"DL Donoho","year":"2000","unstructured":"Donoho, D.L.: High-dimensional data analysis: The curses and blessings of dimensionality. AMS Math Chall. Lecture 1(2000), 32 (2000)","journal-title":"AMS Math Chall. Lecture"},{"key":"794_CR4","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1023\/A:1009876119989","volume":"3","author":"F Provost","year":"1999","unstructured":"Provost, F., Kolluri, V.: A survey of methods for scaling up inductive algorithms. Data Min. Knowl. Disc.Knowl. Disc. 3, 131\u2013169 (1999)","journal-title":"Data Min. Knowl. Disc.Knowl. Disc."},{"key":"794_CR5","doi-asserted-by":"crossref","first-page":"15","DOI":"10.3390\/electronics10151757","volume":"10","author":"MJ Basgall","year":"2021","unstructured":"Basgall, M.J., Naiouf, M., Fern\u00e1ndez, A.: FDR2-BD: A fast data reduction recommendation tool for tabular big data classification problems. Electronics 10, 15 (2021)","journal-title":"Electronics"},{"issue":"5\u20136","key":"794_CR6","doi-asserted-by":"crossref","first-page":"410","DOI":"10.1016\/j.artint.2010.01.001","volume":"174","author":"C Garc\u00eda-Osorio","year":"2010","unstructured":"Garc\u00eda-Osorio, C., de Haro-Garc\u00eda, A., Garc\u00eda-Pedrajas, N.: Democratic instance selection: A linear complexity instance selection algorithm based on classifier ensemble concepts. Artif. Intell.. Intell. 174(5\u20136), 410\u2013441 (2010)","journal-title":"Artif. Intell.. Intell."},{"key":"794_CR7","doi-asserted-by":"crossref","first-page":"416","DOI":"10.1016\/j.asoc.2015.07.046","volume":"37","author":"J P\u00e9rez-Rodr\u00edguez","year":"2015","unstructured":"P\u00e9rez-Rodr\u00edguez, J., Arroyo-Pe\u00f1a, A.G., Garc\u00eda-Pedrajas, N.: Simultaneous instance and feature selection and weighting using evolutionary computation: proposal and study. Appl. Soft Comput.Comput. 37, 416\u2013443 (2015)","journal-title":"Appl. Soft Comput.Comput."},{"issue":"1\u20132","key":"794_CR8","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/j.artint.2004.05.009","volume":"159","author":"H Liu","year":"2004","unstructured":"Liu, H., Motoda, H., Yu, L.: A selective sampling approach to active feature selection. Artif. Intell.. Intell. 159(1\u20132), 49\u201374 (2004)","journal-title":"Artif. Intell.. Intell."},{"key":"794_CR9","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1023\/A:1014043630878","volume":"6","author":"H Brighton","year":"2002","unstructured":"Brighton, H., Mellish, C.: Advances in instance selection for instance-based learning algorithms. Data Min. Knowl. Disc.Knowl. Disc. 6, 153\u2013172 (2002)","journal-title":"Data Min. Knowl. Disc.Knowl. Disc."},{"issue":"1","key":"794_CR10","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1016\/j.datak.2006.01.008","volume":"60","author":"JR Cano","year":"2007","unstructured":"Cano, J.R., Herrera, F., Lozano, M.: Evolutionary stratified training set selection for extracting classification rules with trade off precision-interpretability. Data Knowl. Eng.Knowl. Eng. 60(1), 90\u2013108 (2007)","journal-title":"Data Knowl. Eng.Knowl. Eng."},{"issue":"4","key":"794_CR11","doi-asserted-by":"crossref","first-page":"945","DOI":"10.1198\/1061860032544","volume":"12","author":"P Domingos","year":"2003","unstructured":"Domingos, P., Hulten, G.: A general framework for mining massive data streams. J. Comput. Graph. Stat.Comput. Graph. Stat. 12(4), 945\u2013949 (2003)","journal-title":"J. Comput. Graph. Stat.Comput. Graph. Stat."},{"key":"794_CR12","unstructured":"Bank, D., Koenigstein, N. and Giryes, R.: Autoencoders, arXiv preprint arXiv:2003.05991, (2020)."},{"key":"794_CR13","unstructured":"Bourlard, H.: Auto-association by multilayer perceptrons and singular value decomposition,\" IDIAP2000."},{"key":"794_CR14","first-page":"3371","volume":"11","author":"P Vincent","year":"2010","unstructured":"Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y., Manzagol, P.-A.: Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion. J. Mach. Learn. Res. 11, 3371 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"794_CR15","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.7551\/mitpress\/7503.003.0147","volume-title":"Advances in Neural Information Processing Systems 19: Proceedings of the 2006 Conference","author":"M Ranzato","year":"2007","unstructured":"Ranzato, M., Poultney, C., Chopra, S., LeCun, Y.: Efficient learning of sparse representations with an energy-based model. In: Sch\u00f6lkopf, B., Platt, J., Hofmann, T. (eds.) Advances in Neural Information Processing Systems 19: Proceedings of the 2006 Conference, pp. 1137\u20131144. The MIT Press (2007). https:\/\/doi.org\/10.7551\/mitpress\/7503.003.0147"},{"key":"794_CR16","volume-title":"Deep learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep learning. MIT press (2016)"},{"key":"794_CR17","doi-asserted-by":"crossref","unstructured":"Angiulli, F.: Fast condensed nearest neighbor rule,\" in Proceedings of the 22nd International Conference on Machine learning, pp. 25\u201332. (2005)","DOI":"10.1145\/1102351.1102355"},{"issue":"2","key":"794_CR18","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1109\/5254.671091","volume":"13","author":"J Yang","year":"1998","unstructured":"Yang, J., Honavar, V.: Feature subset selection using a genetic algorithm. IEEE Intell. Syst. Appl. 13(2), 44\u201349 (1998)","journal-title":"IEEE Intell. Syst. Appl."},{"issue":"1","key":"794_CR19","doi-asserted-by":"crossref","first-page":"46","DOI":"10.1109\/TEVC.2023.3346406","volume":"29","author":"BH Nguyen","year":"2025","unstructured":"Nguyen, B.H., Xue, B., Andreae, P., Zhang, M.: Evolutionary instance selection with multiple partial adaptive classifiers for domain adaptation. IEEE Trans. Evol. Comput.Evol. Comput. 29(1), 46\u201360 (2025)","journal-title":"IEEE Trans. Evol. Comput.Evol. Comput."},{"key":"794_CR20","doi-asserted-by":"crossref","first-page":"124558","DOI":"10.1016\/j.eswa.2024.124558","volume":"255","author":"Q Dai","year":"2024","unstructured":"Dai, Q., Wang, L.-H., Xu, K.-L., Du, T., Chen, L.-F.: Class-overlap detection based on heterogeneous clustering ensemble for multi-class imbalance problem. Expert Syst. Appl. 255, 124558 (2024)","journal-title":"Expert Syst. Appl."},{"key":"794_CR21","doi-asserted-by":"crossref","first-page":"1065","DOI":"10.1016\/j.ejor.2023.10.029","volume":"314","author":"D Akta\u015f","year":"2024","unstructured":"Akta\u015f, D., Lokman, B., \u0130nkaya, T., Dejaegere, G.: Cluster ensemble selection and consensus clustering: A multi-objective optimization approach. Eur. J. Op. Res. 314, 1065\u20131077 (2024)","journal-title":"Eur. J. Op. Res."},{"issue":"7","key":"794_CR22","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1162\/089976601750264965","volume":"13","author":"B Sch\u00f6lkopf","year":"2001","unstructured":"Sch\u00f6lkopf, B., Platt, J.C., Shawe-Taylor, J., Smola, A.J., Williamson, R.C.: Estimating the support of a high-dimensional distribution. Neural Comput.Comput. 13(7), 1443\u20131471 (2001)","journal-title":"Neural Comput.Comput."},{"key":"794_CR23","volume-title":"Finding groups in data: an introduction to cluster analysis","author":"L Kaufman","year":"2009","unstructured":"Kaufman, L., Rousseeuw, P.J.: Finding groups in data: an introduction to cluster analysis. John Wiley & Sons (2009)"},{"key":"794_CR24","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Salakhutdinov, R.R.: Reducing the dimensionality of data with neural networks. Science 313, 504\u2013507 (2006)","journal-title":"Science"},{"key":"794_CR25","doi-asserted-by":"crossref","first-page":"12397","DOI":"10.1007\/s00521-024-09676-0","volume":"36","author":"Q Dai","year":"2024","unstructured":"Dai, Q., Liu, J.-W., Wang, L.-H.: Imbalanced instance selection based on Laplacian matrix decomposition with weighted k-nearest-neighbor graph. Neural Comput. Appl.Comput. Appl. 36, 12397\u201312425 (2024)","journal-title":"Neural Comput. Appl.Comput. Appl."},{"key":"794_CR26","doi-asserted-by":"crossref","unstructured":"Dhillon, I. S.: Co-clustering documents and words using bipartite spectral graph partitioning,\" in Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining, pp. 269\u2013274 (2001)","DOI":"10.1145\/502512.502550"},{"key":"794_CR27","doi-asserted-by":"crossref","unstructured":"Sculley, D.: Web-scale k-means clustering,\" in Proceedings of the 19th international conference on World wide web, 2010, pp. 1177\u20131178.","DOI":"10.1145\/1772690.1772862"},{"key":"794_CR28","unstructured":"Ertoz, L., Steinbach, M. and Kumar, V.: A new shared nearest neighbor clustering algorithm and its applications,\" in Workshop on clustering high dimensional data and its applications at 2nd SIAM international conference on data mining, vol. 8. (2002)"},{"issue":"11","key":"794_CR29","doi-asserted-by":"crossref","first-page":"3096","DOI":"10.1002\/cpe.3580","volume":"28","author":"C Deng","year":"2016","unstructured":"Deng, C., et al.: A MapReduce-based parallel K-means clustering for large-scale CIM data verification. Concurr. Comput.: Pr. Exp. 28(11), 3096\u20133114 (2016)","journal-title":"Concurr. Comput.: Pr. Exp."},{"key":"794_CR30","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1007\/978-3-319-94301-5_16","volume-title":"Big Data \u2013 BigData 2018: 7th International Congress, Held as Part of the Services Conference Federation, SCF 2018, Seattle, WA, USA, June 25\u201330, 2018, Proceedings","author":"AA Hai","year":"2018","unstructured":"Hai, A.A., Forouraghi, B.: On scalability of distributed machine learning with big data on apache spark. In: Francis, Y.L., Chin, C.L., Chen, P., Khan, L., Lee, K., Zhang, L.-J. (eds.) Big Data \u2013 BigData 2018: 7th International Congress, Held as Part of the Services Conference Federation, SCF 2018, Seattle, WA, USA, June 25\u201330, 2018, Proceedings, pp. 209\u2013219. Springer International Publishing, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-94301-5_16"},{"key":"794_CR31","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-022-00640-0","author":"J Zhai","year":"2022","unstructured":"Zhai, J., Song, D.: Optimal instance subset selection from big data using genetic algorithm and open source framework. J. Big Data (2022). https:\/\/doi.org\/10.1186\/s40537-022-00640-0","journal-title":"J. Big Data"},{"key":"794_CR32","doi-asserted-by":"crossref","first-page":"121269","DOI":"10.1016\/j.eswa.2023.121269","volume":"236","author":"C Xu","year":"2024","unstructured":"Xu, C., Zhang, S.: A Genetic Algorithm-based sequential instance selection framework for ensemble learning. Expert Syst. Appl. 236, 121269 (2024)","journal-title":"Expert Syst. Appl."},{"key":"794_CR33","doi-asserted-by":"crossref","first-page":"111745","DOI":"10.1016\/j.knosys.2024.111745","volume":"294","author":"M Moradi","year":"2024","unstructured":"Moradi, M., Hamidzadeh, J.: Handling class imbalance and overlap with a Hesitation-based instance selection method. Knowl.-Based Syst..-Based Syst. 294, 111745 (2024)","journal-title":"Knowl.-Based Syst..-Based Syst."},{"key":"794_CR34","doi-asserted-by":"publisher","DOI":"10.1007\/s43674-022-00033-z","author":"J Zhai","year":"2022","unstructured":"Zhai, J., Huang, Y.: Instance selection for big data based on locally sensitive hashing and double-voting mechanism. Adv. Comput. Intell. (2022). https:\/\/doi.org\/10.1007\/s43674-022-00033-z","journal-title":"Adv. Comput. Intell."},{"issue":"6","key":"794_CR35","first-page":"6287","volume":"36","author":"Q Chen","year":"2022","unstructured":"Chen, Q., Cao, F., Xing, Y., Liang, J.: Instance selection: A Bayesian decision theory perspective. Proc. AAAI Conf. Artif. Intell. 36(6), 6287\u20136294 (2022)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"794_CR36","doi-asserted-by":"publisher","first-page":"809","DOI":"10.1007\/978-3-319-71246-8_49","volume-title":"Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2017, Skopje, Macedonia, September 18\u201322, 2017, Proceedings, Part II","author":"K Tian","year":"2017","unstructured":"Tian, K., Zhou, S., Guan, J.: Deepcluster: A general clustering framework based on deep learning. In: Ceci, M., Hollm\u00e9n, J., Todorovski, L., Vens, C., D\u017eeroski, S. (eds.) Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2017, Skopje, Macedonia, September 18\u201322, 2017, Proceedings, Part II, pp. 809\u2013825. Springer International Publishing, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-71246-8_49"},{"key":"794_CR37","unstructured":"Xie, J., Girshick, R. and Farhadi, A.: Unsupervised deep embedding for clustering analysis,\" in International conference on machine learning, 2016, pp. 478\u2013487: PMLR."},{"key":"794_CR38","first-page":"1753","volume":"17","author":"X Guo","year":"2017","unstructured":"Guo, X., Gao, L., Liu, X., Yin, J.: Improved deep embedded clustering with local structure preservation. Ijcai 17, 1753\u20131759 (2017)","journal-title":"Ijcai"},{"key":"794_CR39","unstructured":"Lopez, R., Regier, J., Jordan, M. I. and Yosef, N.: Information constraints on auto-encoding variational bayes,\" Advances in neural information processing systems, vol. 31, (2018)."},{"issue":"1","key":"794_CR40","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1109\/TKDE.2020.2981333","volume":"34","author":"Z Zhang","year":"2020","unstructured":"Zhang, Z., Cui, P., Zhu, W.: Deep learning on graphs: a survey. IEEE Trans. Knowl. Data Eng.Knowl. Data Eng. 34(1), 249\u2013270 (2020)","journal-title":"IEEE Trans. Knowl. Data Eng.Knowl. Data Eng."},{"issue":"1","key":"794_CR41","first-page":"3742536","volume":"2021","author":"S Yu","year":"2021","unstructured":"Yu, S., Liu, J., Han, Z., Li, Y., Tang, Y., Wu, C.: Representation learning based on autoencoder and deep adaptive clustering for image clustering. Math. Probl. Eng.Probl. Eng. 2021(1), 3742536 (2021)","journal-title":"Math. Probl. Eng.Probl. Eng."},{"issue":"2","key":"794_CR42","doi-asserted-by":"crossref","first-page":"1075","DOI":"10.1007\/s00500-021-05934-8","volume":"27","author":"X Huang","year":"2023","unstructured":"Huang, X., Hu, Z., Lin, L.: Deep clustering based on embedded auto-encoder. Soft. Comput.Comput. 27(2), 1075\u20131090 (2023)","journal-title":"Soft. Comput.Comput."},{"issue":"1","key":"794_CR43","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1109\/TKDE.2023.3283425","volume":"36","author":"Y Zhiwen","year":"2024","unstructured":"Zhiwen, Y., Zhong, Z., Yang, K., Wenming Cao, C.L., Chen, P.: Broad learning autoencoder with graph structure for data clustering. IEEE Trans. Knowl. Data Eng.Knowl. Data Eng. 36(1), 49\u201361 (2024). https:\/\/doi.org\/10.1109\/TKDE.2023.3283425","journal-title":"IEEE Trans. Knowl. Data Eng.Knowl. Data Eng."},{"issue":"6","key":"794_CR44","doi-asserted-by":"crossref","first-page":"2055","DOI":"10.1109\/TSC.2019.2907247","volume":"14","author":"DB Rawat","year":"2019","unstructured":"Rawat, D.B., Doku, R., Garuba, M.: Cybersecurity in big data era: From securing big data to data-driven security. IEEE Trans. Serv. Comput.Comput. 14(6), 2055\u20132072 (2019)","journal-title":"IEEE Trans. Serv. Comput.Comput."},{"issue":"1\u20139","key":"794_CR45","first-page":"2023","volume":"3","author":"F Del Giorgio Solfa","year":"2023","unstructured":"Del Giorgio Solfa, F., Simonato, F.R.: Big data analytics in healthcare: exploring the role of machine learning in predicting patient outcomes and improving healthcare delivery. Int. J. Comput. Inf. Manuf. (Ijcim) 3(1\u20139), 2023 (2023)","journal-title":"Int. J. Comput. Inf. Manuf. (Ijcim)"},{"key":"794_CR46","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1007\/978-3-540-74976-9_25","volume-title":"Knowledge Discovery in Databases: PKDD 2007","author":"Y Song","year":"2007","unstructured":"Song, Y., Huang, J., Zhou, D., HongyuanZha, C., Giles, L.: Iknn: Informative k-nearest neighbor pattern classification. In: Kok, J.N., Koronacki, J., Lopez, R., de Mantaras, S., Matwin, D.M., Skowron, A. (eds.) Knowledge Discovery in Databases: PKDD 2007, pp. 248\u2013264. Springer Berlin Heidelberg, Berlin, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74976-9_25"},{"issue":"3","key":"794_CR47","doi-asserted-by":"publisher","first-page":"308","DOI":"10.3390\/j4030024","volume":"4","author":"M Rubbo","year":"2021","unstructured":"Rubbo, M., Silva, L.A.: Filtering-based instance selection method for overlapping problem in imbalanced datasets. J 4(3), 308\u2013327 (2021). https:\/\/doi.org\/10.3390\/j4030024","journal-title":"J"},{"issue":"12","key":"794_CR48","doi-asserted-by":"crossref","first-page":"9294","DOI":"10.1109\/TKDE.2024.3419834","volume":"36","author":"K Yang","year":"2024","unstructured":"Yang, K., Yu, Z., Chen, W., Liang, Z., Chen, C.L.P.: Solving the imbalanced problem by metric learning and oversampling. IEEE Trans. Knowl. Data Eng.Knowl. Data Eng. 36(12), 9294\u20139307 (2024)","journal-title":"IEEE Trans. Knowl. Data Eng.Knowl. Data Eng."},{"key":"794_CR49","doi-asserted-by":"crossref","unstructured":"Fawzy, M., Tawfik, N. S. and Saleh, S. N.: Cluster-based optimization of training data selection for image copy detection models,\" in 2024 14th International Conference on Electrical Engineering (ICEENG), pp. 225\u2013230 (2024).","DOI":"10.1109\/ICEENG58856.2024.10566392"},{"issue":"3","key":"794_CR50","doi-asserted-by":"crossref","first-page":"765","DOI":"10.1007\/s13042-021-01327-3","volume":"13","author":"S An","year":"2021","unstructured":"An, S., Hu, Q., Wang, C., Guo, G., Li, P.: Data reduction based on NN-kNN measure for NN classification and regression. Int. J. Mach. Learn. Cybern.Cybern. 13(3), 765\u2013781 (2021)","journal-title":"Int. J. Mach. Learn. Cybern.Cybern."},{"key":"794_CR51","unstructured":"Z. Chai and Y. Li, 2022."},{"key":"794_CR52","doi-asserted-by":"crossref","unstructured":"Chai, Z. and Li, Y.: Sample reduction algorithm based on classification contribution, Research Square, 2022 (2022).","DOI":"10.21203\/rs.3.rs-1780707\/v1"},{"key":"794_CR53","doi-asserted-by":"crossref","first-page":"113269","DOI":"10.1016\/j.eswa.2020.113269","volume":"150","author":"GD Cavalcanti","year":"2020","unstructured":"Cavalcanti, G.D., Soares, R.J.: Ranking-based instance selection for pattern classification. Expert Syst. Appl. 150, 113269 (2020)","journal-title":"Expert Syst. Appl."},{"key":"794_CR54","doi-asserted-by":"publisher","first-page":"115293","DOI":"10.1016\/j.eswa.2021.115293","volume":"183","author":"A Shokrzade","year":"2021","unstructured":"Shokrzade, A., Ramezani, M., Tab, F.A., Mohammad, M.A.: A novel extreme learning machine based kNN classification method for dealing with big data. Expert Syst. Appl. 183, 115293 (2021). https:\/\/doi.org\/10.1016\/j.eswa.2021.115293","journal-title":"Expert Syst. Appl."},{"issue":"5","key":"794_CR55","doi-asserted-by":"crossref","first-page":"890","DOI":"10.1109\/TNN.2009.2018547","volume":"20","author":"HA Fayed","year":"2009","unstructured":"Fayed, H.A., Atiya, A.F.: A novel template reduction approach for the K-nearest neighbor method. IEEE Trans. Neural Netw.Netw. 20(5), 890\u2013896 (2009)","journal-title":"IEEE Trans. Neural Netw.Netw."},{"issue":"3","key":"794_CR56","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1002\/sam.11508","volume":"14","author":"VR Joseph","year":"2021","unstructured":"Joseph, V.R., Mak, S.: Supervised compression of big data. Stat. Anal. Data Mining: ASA Data Sci. J. 14(3), 217\u2013229 (2021)","journal-title":"Stat. Anal. Data Mining: ASA Data Sci. J."},{"issue":"3","key":"794_CR57","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s13748-017-0117-5","volume":"6","author":"\u00c1 Arnaiz-Gonz\u00e1lez","year":"2017","unstructured":"Arnaiz-Gonz\u00e1lez, \u00c1., Gonz\u00e1lez-Rogel, A., D\u00edez-Pastor, J.-F., L\u00f3pez-Nozal, C.: MR-DIS: democratic instance selection for big data by MapReduce. Prog. Artif. Intell. 6(3), 211\u2013219 (2017)","journal-title":"Prog. Artif. Intell."},{"key":"794_CR58","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1016\/j.ijar.2021.08.006","volume":"138","author":"C Gong","year":"2021","unstructured":"Gong, C., Su, Z.-G., Wang, P.-H., Wang, Q., You, Y.: Evidential instance selection for K-nearest neighbor classification of big data. Int. J. Approx. Reason. 138, 123\u2013144 (2021)","journal-title":"Int. J. Approx. Reason."},{"key":"794_CR59","doi-asserted-by":"crossref","first-page":"197334","DOI":"10.1007\/s11704-024-40004-w","volume":"19","author":"Z Yu","year":"2024","unstructured":"Yu, Z., Dong, Z., Yu, C., Yang, K., Fan, Z., Chen, C.L.P.: A review on multi-view learning. Front. Comput. Sci. 19, 197334 (2024)","journal-title":"Front. Comput. Sci."},{"issue":"11","key":"794_CR60","doi-asserted-by":"publisher","first-page":"7129","DOI":"10.1109\/TSMC.2024.3448206","volume":"54","author":"\u00c1B Csap\u00f3","year":"2024","unstructured":"Csap\u00f3, \u00c1.B.: Subsample, generate, and stack using the spiral discovery method: a framework for autoregressive data compression and augmentation. IEEE Trans. Syst., Man, Cybern.: Syst. 54(11), 7129\u20137142 (2024). https:\/\/doi.org\/10.1109\/TSMC.2024.3448206","journal-title":"IEEE Trans. Syst., Man, Cybern.: Syst."},{"issue":"3","key":"794_CR61","first-page":"467","volume":"29","author":"S \u0141ukasik","year":"2019","unstructured":"\u0141ukasik, S., Lalik, K., Sarna, P., Kowalski, P.A., Charytanowicz, M., Kulczycki, P.: Efficient astronomical data condensation using approximate nearest neighbors. Int. J. Appl. Math. Comput. Sci.Comput. Sci. 29(3), 467\u2013476 (2019)","journal-title":"Int. J. Appl. Math. Comput. Sci.Comput. Sci."},{"key":"794_CR62","first-page":"255","volume":"17","author":"J Derrac","year":"2015","unstructured":"Derrac, J., Garcia, S., Sanchez, L., Herrera, F.: Keel data-mining software tool: Data set repository, integration of algorithms and experimental analysis framework. J. Mult. Valued Logic Soft Comput 17, 255\u2013287 (2015)","journal-title":"J. Mult. Valued Logic Soft Comput"},{"key":"794_CR63","doi-asserted-by":"crossref","unstructured":"Alejo, R., Sotoca, J. M., Valdovinos, R. M. and Toribio, P.: Edited nearest neighbor rule for improving neural networks classifications,\" in Advances in Neural Networks-ISNN 2010: 7th International Symposium on Neural Networks, ISNN 2010, Shanghai, China, June 6\u20139, 2010, Proceedings, Part I 7, 2010, pp. 303\u2013310: Springer.","DOI":"10.1007\/978-3-642-13278-0_39"},{"key":"794_CR64","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1023\/A:1007626913721","volume":"38","author":"DR Wilson","year":"2000","unstructured":"Wilson, D.R., Martinez, T.R.: Reduction techniques for instance-based learning algorithms. Mach. Learn. 38, 257\u2013286 (2000)","journal-title":"Mach. Learn."},{"issue":"17","key":"794_CR65","doi-asserted-by":"crossref","first-page":"6894","DOI":"10.1016\/j.eswa.2013.06.053","volume":"40","author":"GD Cavalcanti","year":"2013","unstructured":"Cavalcanti, G.D., Ren, T.I., Pereira, C.L.: ATISA: adaptive threshold-based instance selection algorithm. Expert Syst. Appl. 40(17), 6894\u20136900 (2013)","journal-title":"Expert Syst. Appl."}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-025-00794-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41060-025-00794-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-025-00794-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T12:18:01Z","timestamp":1758975481000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41060-025-00794-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,13]]},"references-count":65,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["794"],"URL":"https:\/\/doi.org\/10.1007\/s41060-025-00794-z","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"value":"2364-415X","type":"print"},{"value":"2364-4168","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,13]]},"assertion":[{"value":"17 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}