{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:46:46Z","timestamp":1760316406389,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032078834","type":"print"},{"value":"9783032078841","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07884-1_14","type":"book-chapter","created":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T16:22:09Z","timestamp":1760286129000},"page":"269-288","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["How Dataset Diversity Affects Generalization in\u00a0ML-Based NIDS"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0503-8986","authenticated-orcid":false,"given":"Benoit","family":"Nougnanke","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8150-6617","authenticated-orcid":false,"given":"Gregory","family":"Blanc","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4423-5720","authenticated-orcid":false,"given":"Thomas","family":"Robert","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"key":"14_CR1","doi-asserted-by":"publisher","unstructured":"Andresini, G., Pendlebury, F., Pierazzi, F., Loglisci, C., Appice, A., Cavallaro, L.: Insomnia: towards concept-drift robustness in network intrusion detection. In: Proceedings of the 14th ACM Workshop on Artificial Intelligence and Security, pp. 111\u2013122 (2021). https:\/\/doi.org\/10.1145\/3474369.3486864","DOI":"10.1145\/3474369.3486864"},{"key":"14_CR2","doi-asserted-by":"publisher","unstructured":"Apruzzese, G., Laskov, P., Schneider, J.: Sok: pragmatic assessment of machine learning for network intrusion detection. In: 2023 IEEE 8th European Symposium on Security and Privacy (EuroS &P), pp. 592\u2013614. IEEE (2023). https:\/\/doi.org\/10.1109\/EUROSP57164.2023.00042","DOI":"10.1109\/EUROSP57164.2023.00042"},{"issue":"4","key":"14_CR3","doi-asserted-by":"publisher","first-page":"5152","DOI":"10.1109\/TNSM.2022.3157344","volume":"19","author":"G Apruzzese","year":"2022","unstructured":"Apruzzese, G., Pajola, L., Conti, M.: The cross-evaluation of machine learning-based network intrusion detection systems. IEEE Trans. Netw. Serv. Manage. 19(4), 5152\u20135169 (2022). https:\/\/doi.org\/10.1109\/TNSM.2022.3157344","journal-title":"IEEE Trans. Netw. Serv. Manage."},{"key":"14_CR4","unstructured":"Arp, D., et al.: Dos and don\u2019ts of machine learning in computer security. In: 31st USENIX Security Symposium (USENIX Security 22), pp. 3971\u20133988 (2022)"},{"key":"14_CR5","doi-asserted-by":"publisher","unstructured":"Ayoubi, S., Blanc, G., Jmila, H., Silverston, T., Tixeuil, S.: Data-driven evaluation of intrusion detectors: a methodological framework. In: International Symposium on Foundations and Practice of Security, pp. 142\u2013157. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-30122-3_9","DOI":"10.1007\/978-3-031-30122-3_9"},{"issue":"1","key":"14_CR6","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1186\/S40537-020-00390-X","volume":"8","author":"S Bagui","year":"2021","unstructured":"Bagui, S., Li, K.: Resampling imbalanced data for network intrusion detection datasets. J. Big Data 8(1), 6 (2021). https:\/\/doi.org\/10.1186\/S40537-020-00390-X","journal-title":"J. Big Data"},{"key":"14_CR7","unstructured":"Bandalos, D.L.: Measurement theory and applications for the social sciences. Guilford Publications (2017)"},{"key":"14_CR8","unstructured":"Budach, L., et al.: The effects of data quality on machine learning performance. arXiv preprint arXiv:2207.14529 (2022)"},{"key":"14_CR9","doi-asserted-by":"publisher","unstructured":"Catillo, M., Vecchio, A.D., Pecchia, A., Villano, U.: A critique on the use of machine learning on public datasets for intrusion detection. In: Quality of Information and Communications Technology (2021). https:\/\/doi.org\/10.1007\/978-3-030-85347-1_19, https:\/\/api.semanticscholar.org\/CorpusID:237378606","DOI":"10.1007\/978-3-030-85347-1_19"},{"key":"14_CR10","unstructured":"Chen, M., Goel, K., Sohoni, N.S., Poms, F., Fatahalian, K., R\u00e9, C.: Mandoline: model evaluation under distribution shift. In: International Conference on Machine Learning, pp. 1617\u20131629. PMLR (2021)"},{"issue":"CoNEXT4","key":"14_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3696407","volume":"2","author":"J C\u00fcppers","year":"2024","unstructured":"C\u00fcppers, J., Schoen, A., Blanc, G., Gimenez, P.F.: Flowchronicle: synthetic network flow generation through pattern set mining. Proc. ACM Netw. 2(CoNEXT4), 1\u201320 (2024). https:\/\/doi.org\/10.1145\/3696407","journal-title":"Proc. ACM Netw."},{"key":"14_CR12","doi-asserted-by":"publisher","unstructured":"D\u2019hooge, L., Wauters, T., Volckaert, B., De\u00a0Turck, F.: Classification hardness for supervised learners on 20 years of intrusion detection data. iEEE Access 7, 167455\u2013167469 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2953451","DOI":"10.1109\/ACCESS.2019.2953451"},{"key":"14_CR13","doi-asserted-by":"publisher","first-page":"863","DOI":"10.1613\/JAIR.1.11192","volume":"61","author":"A Fern\u00e1ndez","year":"2018","unstructured":"Fern\u00e1ndez, A., Garcia, S., Herrera, F., Chawla, N.V.: Smote for learning from imbalanced data: progress and challenges, marking the 15-year anniversary. J. Artif. Intell. Res. 61, 863\u2013905 (2018). https:\/\/doi.org\/10.1613\/JAIR.1.11192","journal-title":"J. Artif. Intell. Res."},{"key":"14_CR14","doi-asserted-by":"publisher","unstructured":"Flood, R., Aspinall, D.: Measuring the complexity of benchmark nids datasets via spectral analysis. In: 2024 IEEE European Symposium on Security and Privacy Workshops (EuroS &PW), pp. 335\u2013341. IEEE (2024). https:\/\/doi.org\/10.1109\/EUROSPW61312.2024.00043","DOI":"10.1109\/EUROSPW61312.2024.00043"},{"key":"14_CR15","doi-asserted-by":"publisher","unstructured":"Flood, R., Engelen, G., Aspinall, D., Desmet, L.: Bad design smells in benchmark nids datasets. In: 2024 IEEE 9th European Symposium on Security and Privacy (EuroS &P), pp. 658\u2013675. IEEE (2024). https:\/\/doi.org\/10.1109\/EUROSP60621.2024.00042","DOI":"10.1109\/EUROSP60621.2024.00042"},{"key":"14_CR16","unstructured":"Friedman, D., Dieng, A.B.: The vendi score: a diversity evaluation metric for machine learning. Trans. Mach. Learn. Res. (2023). https:\/\/openreview.net\/forum?id=g97OHbQyk1"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Gharib, A., Sharafaldin, I., Lashkari, A.H., Ghorbani, A.A.: An evaluation framework for intrusion detection dataset. In: 2016 International Conference on Information Science and Security (ICISS), pp.\u00a01\u20136. IEEE (2016)","DOI":"10.1109\/ICISSEC.2016.7885840"},{"key":"14_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2025.104510","volume":"156","author":"P Goldschmidt","year":"2025","unstructured":"Goldschmidt, P., Chud\u00e1, D.: Network intrusion datasets: a survey, limitations, and recommendations. Comput. Sec. 156, 104510 (2025). https:\/\/doi.org\/10.1016\/j.cose.2025.104510","journal-title":"Comput. Sec."},{"key":"14_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/J.INFSOF.2023.107268","volume":"162","author":"Y Gong","year":"2023","unstructured":"Gong, Y., Liu, G., Xue, Y., Li, R., Meng, L.: A survey on dataset quality in machine learning. Inf. Softw. Technol. 162, 107268 (2023). https:\/\/doi.org\/10.1016\/J.INFSOF.2023.107268","journal-title":"Inf. Softw. Technol."},{"issue":"3","key":"14_CR20","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/S10994-021-05946-3","volume":"110","author":"E H\u00fcllermeier","year":"2021","unstructured":"H\u00fcllermeier, E., Waegeman, W.: Aleatoric and epistemic uncertainty in machine learning: an introduction to concepts and methods. Mach. Learn. 110(3), 457\u2013506 (2021). https:\/\/doi.org\/10.1007\/S10994-021-05946-3","journal-title":"Mach. Learn."},{"issue":"2","key":"14_CR21","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1111\/j.2006.0030-1299.14714.x","volume":"113","author":"L Jost","year":"2006","unstructured":"Jost, L.: Entropy and diversity. Oikos 113(2), 363\u2013375 (2006)","journal-title":"Oikos"},{"key":"14_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/J.COSE.2020.102022","volume":"99","author":"A Kenyon","year":"2020","unstructured":"Kenyon, A., Deka, L., Elizondo, D.: Are public intrusion datasets fit for purpose characterising the state of the art in intrusion event datasets. Comput. Sec. 99, 102022 (2020). https:\/\/doi.org\/10.1016\/J.COSE.2020.102022","journal-title":"Comput. Sec."},{"key":"14_CR23","unstructured":"Kimura, M., Hino, H.: A short survey on importance weighting for machine learning. arXiv preprint arXiv:2403.10175 (2024)"},{"key":"14_CR24","doi-asserted-by":"publisher","unstructured":"Lanvin, M., Gimenez, P.F., Han, Y., Majorczyk, F., M\u00e9, L., Totel, E.: Errors in the cicids2017 dataset and the significant differences in detection performances it makes. In: International Conference on Risks and Security of Internet and Systems, pp. 18\u201333. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-31108-6_2","DOI":"10.1007\/978-3-031-31108-6_2"},{"key":"14_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/J.COSE.2020.101851","volume":"95","author":"X Li","year":"2020","unstructured":"Li, X., Chen, W., Zhang, Q., Wu, L.: Building auto-encoder intrusion detection system based on random forest feature selection. Comput. Sec. 95, 101851 (2020). https:\/\/doi.org\/10.1016\/J.COSE.2020.101851","journal-title":"Comput. Sec."},{"key":"14_CR26","doi-asserted-by":"publisher","unstructured":"Liu, L., Engelen, G., Lynar, T., Essam, D., Joosen, W.: Error prevalence in nids datasets: a case study on cic-ids-2017 and cse-cic-ids-2018. In: 2022 IEEE Conference on Communications and Network Security (CNS), pp. 254\u2013262. IEEE (2022). https:\/\/doi.org\/10.1109\/CNS56114.2022.9947235","DOI":"10.1109\/CNS56114.2022.9947235"},{"key":"14_CR27","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/J.INS.2013.07.007","volume":"250","author":"V L\u00f3pez","year":"2013","unstructured":"L\u00f3pez, V., Fern\u00e1ndez, A., Garc\u00eda, S., Palade, V., Herrera, F.: An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics. Inf. Sci. 250, 113\u2013141 (2013). https:\/\/doi.org\/10.1016\/J.INS.2013.07.007","journal-title":"Inf. Sci."},{"key":"14_CR28","unstructured":"Mitchell, M., et al.: Measuring data. arXiv preprint arXiv:2212.05129 (2022)"},{"issue":"1","key":"14_CR29","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1016\/J.PATCOG.2011.06.019","volume":"45","author":"JG Moreno-Torres","year":"2012","unstructured":"Moreno-Torres, J.G., Raeder, T., Alaiz-Rodr\u00edguez, R., Chawla, N.V., Herrera, F.: A unifying view on dataset shift in classification. Pattern Recogn. 45(1), 521\u2013530 (2012). https:\/\/doi.org\/10.1016\/J.PATCOG.2011.06.019","journal-title":"Pattern Recogn."},{"key":"14_CR30","unstructured":"Pendlebury, F., Pierazzi, F., Jordaney, R., Kinder, J., Cavallaro, L.: $$\\{$$TESSERACT$$\\}$$: eliminating experimental bias in malware classification across space and time. In: 28th USENIX security symposium (USENIX Security 2019), pp. 729\u2013746 (2019)"},{"key":"14_CR31","doi-asserted-by":"publisher","unstructured":"Pinto, D., Amorim, I., Maia, E., Pra\u00e7a, I.: A review on intrusion detection datasets: tools, processes, and features. Comput. Netw., 111177 (2025). https:\/\/doi.org\/10.1016\/J.COMNET.2025.111177","DOI":"10.1016\/J.COMNET.2025.111177"},{"key":"14_CR32","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1016\/J.COSE.2019.06.005","volume":"86","author":"M Ring","year":"2019","unstructured":"Ring, M., Wunderlich, S., Scheuring, D., Landes, D., Hotho, A.: A survey of network-based intrusion detection data sets. Comput. Sec. 86, 147\u2013167 (2019). https:\/\/doi.org\/10.1016\/J.COSE.2019.06.005","journal-title":"Comput. Sec."},{"issue":"2018","key":"14_CR33","doi-asserted-by":"publisher","first-page":"108","DOI":"10.5220\/0006639801080116","volume":"1","author":"I Sharafaldin","year":"2018","unstructured":"Sharafaldin, I., Lashkari, A.H., Ghorbani, A.A., et al.: Toward generating a new intrusion detection dataset and intrusion traffic characterization. ICISSp 1(2018), 108\u2013116 (2018). https:\/\/doi.org\/10.5220\/0006639801080116","journal-title":"ICISSp"},{"issue":"3\u201328","key":"14_CR34","first-page":"6","volume":"30","author":"A Storkey","year":"2009","unstructured":"Storkey, A., et al.: When training and test sets are different: characterizing learning transfer. Dataset Shift Mach. Learn. 30(3\u201328), 6 (2009)","journal-title":"Dataset Shift Mach. Learn."},{"issue":"4","key":"14_CR35","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1007\/s00442-010-1812-0","volume":"164","author":"H Tuomisto","year":"2010","unstructured":"Tuomisto, H.: A consistent terminology for quantifying species diversity? yes, it does exist. Oecologia 164(4), 853\u2013860 (2010)","journal-title":"Oecologia"},{"key":"14_CR36","unstructured":"Wang, P., et al.: Diversity measurement and subset selection for instruction tuning datasets. arXiv preprint arXiv:2402.02318 (2024)"},{"key":"14_CR37","doi-asserted-by":"publisher","unstructured":"Wasielewska, K., Soukup, D., \u010cejka, T., Camacho, J.: Evaluation of the limit of detection in network dataset quality assessment with perqoda. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 170\u2013185. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-23633-4_13","DOI":"10.1007\/978-3-031-23633-4_13"},{"key":"14_CR38","unstructured":"Wu, S., Lu, K., Xu, B., Lin, J., Su, Q., Zhou, C.: Self-evolved diverse data sampling for efficient instruction tuning. arXiv preprint arXiv:2311.08182 (2023)"},{"key":"14_CR39","unstructured":"Zhao, D., Andrews, J., Papakyriakopoulos, O., Xiang, A.: Position: Measure dataset diversity, don\u2019t just claim it. In: Forty-first International Conference on Machine Learning (2024). https:\/\/openreview.net\/forum?id=jsKr6RVDDs"}],"container-title":["Lecture Notes in Computer Science","Computer Security \u2013 ESORICS 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07884-1_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T16:22:17Z","timestamp":1760286137000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07884-1_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"ISBN":["9783032078834","9783032078841"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07884-1_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"13 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ESORICS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Symposium on Research in Computer Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Toulouse","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"esorics2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.esorics2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}