{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T15:34:53Z","timestamp":1743089693229,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031234910"},{"type":"electronic","value":"9783031234927"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-23492-7_30","type":"book-chapter","created":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T10:03:37Z","timestamp":1674468217000},"page":"353-362","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["The Impact of\u00a0Data Preprocessing on\u00a0Prediction Effectiveness"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5222-8101","authenticated-orcid":false,"given":"Adam","family":"Kiersztyn","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1957-1797","authenticated-orcid":false,"given":"Krystyna","family":"Kiersztyn","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,1,24]]},"reference":[{"issue":"3","key":"30_CR1","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1080\/00401706.1980.10486163","volume":"22","author":"M Aitkin","year":"1980","unstructured":"Aitkin, M., Wilson, G.T.: Mixture models, outliers, and the EM algorithm. Technometrics 22(3), 325\u2013331 (1980)","journal-title":"Technometrics"},{"issue":"16","key":"30_CR2","first-page":"4102","volume":"12","author":"SA Alasadi","year":"2017","unstructured":"Alasadi, S.A., Bhaya, W.S.: Review of data preprocessing techniques in data mining. J. Eng. Appl. Sci. 12(16), 4102\u20134107 (2017)","journal-title":"J. Eng. Appl. Sci."},{"issue":"3","key":"30_CR3","doi-asserted-by":"publisher","first-page":"595","DOI":"10.1007\/s11629-018-5168-y","volume":"16","author":"A Arabameri","year":"2019","unstructured":"Arabameri, A., Pradhan, B., Rezaei, K., Sohrabi, M., Kalantari, Z.: Gis-based landslide susceptibility mapping using numerical risk factor bivariate model and its ensemble with linear multivariate regression and boosted regression tree algorithms. J. Mt. Sci. 16(3), 595\u2013618 (2019)","journal-title":"J. Mt. Sci."},{"issue":"2\u20133","key":"30_CR4","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/S0888-613X(02)00077-4","volume":"32","author":"MR Berthold","year":"2003","unstructured":"Berthold, M.R.: Mixed fuzzy rule formation. Int. J. Approx. Reason. 32(2\u20133), 67\u201384 (2003)","journal-title":"Int. J. Approx. Reason."},{"key":"30_CR5","doi-asserted-by":"publisher","unstructured":"Breunig, M.M., Kriegel, H.P., Ng, R.T., Sander, J.: LOF: identifying density-based local outliers. In: Proceedings of the 2000 ACM SIGMOD International Conference on Management of Data, pp. 93\u2013104 (2000). https:\/\/doi.org\/10.1145\/342009.335388","DOI":"10.1145\/342009.335388"},{"issue":"2","key":"30_CR6","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1023\/A:1009869804967","volume":"3","author":"D Coppersmith","year":"1999","unstructured":"Coppersmith, D., Hong, S.J., Hosking, J.R.: Partitioning nominal attributes in decision trees. Data Min. Knowl. Discov. 3(2), 197\u2013217 (1999)","journal-title":"Data Min. Knowl. Discov."},{"key":"30_CR7","unstructured":"Donovan, B., Work, D.: New York city taxi trip data (2010\u20132013) (2014). https:\/\/doi.org\/10.13012\/J8PN93H8"},{"issue":"4","key":"30_CR8","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1016\/S0167-9473(01)00065-2","volume":"38","author":"JH Friedman","year":"2002","unstructured":"Friedman, J.H.: Stochastic gradient boosting. Comput. Stat. Data Anal. 38(4), 367\u2013378 (2002)","journal-title":"Comput. Stat. Data Anal."},{"issue":"1","key":"30_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10115-011-0463-8","volume":"33","author":"F Kamiran","year":"2012","unstructured":"Kamiran, F., Calders, T.: Data preprocessing techniques for classification without discrimination. Knowl. Inf. Syst. 33(1), 1\u201333 (2012)","journal-title":"Knowl. Inf. Syst."},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Karczmarek, P., Kiersztyn, A., Pedrycz, W.: Fuzzy set-based isolation forest. In: 2020 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE), pp. 1\u20136. IEEE (2020)","DOI":"10.1109\/FUZZ48607.2020.9177718"},{"key":"30_CR11","doi-asserted-by":"crossref","unstructured":"Karczmarek, P., Kiersztyn, A., Pedrycz, W., Al, E.: K-means-based isolation forest. Knowl.-Based Syst. 195, 105659 (2020)","DOI":"10.1016\/j.knosys.2020.105659"},{"key":"30_CR12","doi-asserted-by":"publisher","first-page":"107354","DOI":"10.1016\/j.asoc.2021.107354","volume":"106","author":"P Karczmarek","year":"2021","unstructured":"Karczmarek, P., Kiersztyn, A., Pedrycz, W., Czerwi\u0144ski, D.: Fuzzy c-means-based isolation forest. Appl. Soft Comput. 106, 107354 (2021)","journal-title":"Appl. Soft Comput."},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Kiersztyn, A., Karczmarek, P., Kiersztyn, K., Pedrycz, W.: The concept of detecting and classifying anomalies in large data sets on a basis of information granules. In: 2020 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE), pp. 1\u20137. IEEE (2020)","DOI":"10.1109\/FUZZ48607.2020.9177668"},{"issue":"8","key":"30_CR14","doi-asserted-by":"publisher","first-page":"2850","DOI":"10.1109\/TFUZZ.2021.3076265","volume":"30","author":"A Kiersztyn","year":"2021","unstructured":"Kiersztyn, A., Karczmarek, P., Kiersztyn, K., Pedrycz, W.: Detection and classification of anomalies in large data sets on the basis of information granules. IEEE Trans. Fuzzy Syst. 30(8), 2850\u20132860 (2021)","journal-title":"IEEE Trans. Fuzzy Syst."},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Kiersztyn, A., et al.: Data imputation in related time series using fuzzy set-based techniques. In: 2020 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE), pp. 1\u20138. IEEE (2020)","DOI":"10.1109\/FUZZ48607.2020.9177617"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Kiersztyn, A., et al.: A comprehensive analysis of the impact of selecting the training set elements on the correctness of classification for highly variable ecological data. In: 2021 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE), pp. 1\u20136. IEEE (2021)","DOI":"10.1109\/FUZZ45933.2021.9494399"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Kiersztyn, K.: Intuitively adaptable outlier detector. Stat. Anal. Data Min.: ASAData Sci. J. 15(4), 463\u2013479 (2021)","DOI":"10.1002\/sam.11562"},{"key":"30_CR18","doi-asserted-by":"publisher","unstructured":"Liu, F.T., Ting, K.M., Zhou, Z.H.: Isolation-based anomaly detection. ACM Trans. Knowl. Discov. Data 6(1) (2012). https:\/\/doi.org\/10.1145\/2133360.2133363","DOI":"10.1145\/2133360.2133363"},{"key":"30_CR19","doi-asserted-by":"publisher","first-page":"109964","DOI":"10.1016\/j.ecolmodel.2022.109964","volume":"468","author":"R \u0141opucki","year":"2022","unstructured":"\u0141opucki, R., Kiersztyn, A., Pitucha, G., Kitowski, I.: Handling missing data in ecological studies: ignoring gaps in the dataset can distort the inference. Ecol. Modell. 468, 109964 (2022)","journal-title":"Ecol. Modell."},{"key":"30_CR20","doi-asserted-by":"publisher","first-page":"63279","DOI":"10.1109\/ACCESS.2018.2877269","volume":"6","author":"MS Osman","year":"2018","unstructured":"Osman, M.S., Abu-Mahfouz, A.M., Page, P.R.: A survey on data imputation techniques: water distribution system as a use case. IEEE Access 6, 63279\u201363291 (2018)","journal-title":"IEEE Access"},{"issue":"3","key":"30_CR21","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1007\/s11222-016-9649-y","volume":"27","author":"J Piironen","year":"2017","unstructured":"Piironen, J., Vehtari, A.: Comparison of Bayesian predictive methods for model selection. Stat. Comput. 27(3), 711\u2013735 (2017)","journal-title":"Stat. Comput."},{"key":"30_CR22","doi-asserted-by":"crossref","unstructured":"Priyanka, K.D.: Decision tree classifier: a detailed survey. Int. J. Inf. Decis. Sci. 12(3), 246\u2013269 (2020)","DOI":"10.1504\/IJIDS.2020.108141"},{"key":"30_CR23","unstructured":"Raval, K.M.: Data mining techniques. Int. J. Adv. Res. Comput. Sci. Softw. Eng. 2(10) (2012)"},{"issue":"3","key":"30_CR24","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1080\/00401706.1999.10485670","volume":"41","author":"PJ Rousseeuw","year":"1999","unstructured":"Rousseeuw, P.J., Driessen, K.V.: A fast algorithm for the minimum covariance determinant estimator. Technometrics 41(3), 212\u2013223 (1999). https:\/\/doi.org\/10.1080\/00401706.1999.10485670","journal-title":"Technometrics"},{"issue":"1","key":"30_CR25","first-page":"7","volume":"5","author":"S Vijayarani","year":"2015","unstructured":"Vijayarani, S., Ilamathi, M.J., Nithya, M., et al.: Preprocessing techniques for text mining-an overview. Int. J. Comput. Sci. Commun. Netw. 5(1), 7\u201316 (2015)","journal-title":"Int. J. Comput. Sci. Commun. Netw."},{"key":"30_CR26","doi-asserted-by":"publisher","first-page":"107964","DOI":"10.1109\/ACCESS.2019.2932769","volume":"7","author":"H Wang","year":"2019","unstructured":"Wang, H., Bah, M.J., Hammad, M.: Progress in outlier detection techniques: a survey. IEEE Access 7, 107964\u2013108000 (2019)","journal-title":"IEEE Access"},{"issue":"1\u20132","key":"30_CR27","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1016\/j.jhydrol.2010.05.040","volume":"389","author":"C Wu","year":"2010","unstructured":"Wu, C., Chau, K.W., Fan, C.: Prediction of rainfall time series using modular artificial neural networks coupled with data-preprocessing techniques. J. Hydrol. 389(1\u20132), 146\u2013167 (2010)","journal-title":"J. Hydrol."},{"key":"30_CR28","unstructured":"Zhang, Z.: Missing data imputation: focusing on single imputation. Ann. Transl. Med. 4(1) (2016)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence and Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-23492-7_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,3]],"date-time":"2023-02-03T09:11:41Z","timestamp":1675415501000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-23492-7_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031234910","9783031234927"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-23492-7_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"24 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAISC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence and Soft Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zakopane","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 June 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 June 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaisc2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icaisc.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}