{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:12:55Z","timestamp":1755839575799,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,29]],"date-time":"2023-08-29T00:00:00Z","timestamp":1693267200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,29]]},"DOI":"10.1145\/3600160.3600168","type":"proceedings-article","created":{"date-parts":[[2023,8,9]],"date-time":"2023-08-09T22:54:41Z","timestamp":1691621681000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["A hybrid anonymization pipeline to improve the privacy-utility balance in sensitive datasets for ML purposes"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3448-1554","authenticated-orcid":false,"given":"Jenno","family":"Verdonck","sequence":"first","affiliation":[{"name":"KU Leuven - DistriNet, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7143-8742","authenticated-orcid":false,"given":"Kevin","family":"De Boeck","sequence":"additional","affiliation":[{"name":"KU Leuven - DistriNet, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0225-9705","authenticated-orcid":false,"given":"Michiel","family":"Willocx","sequence":"additional","affiliation":[{"name":"KU Leuven - DistriNet, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1863-1172","authenticated-orcid":false,"given":"Jorn","family":"Lapon","sequence":"additional","affiliation":[{"name":"KU Leuven - DistriNet, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9255-4902","authenticated-orcid":false,"given":"Vincent","family":"Naessens","sequence":"additional","affiliation":[{"name":"KU Leuven - DistriNet, Belgium"}]}],"member":"320","published-online":{"date-parts":[[2023,8,29]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/NTMS.2016.7792481"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33167-1_18"},{"key":"e_1_3_2_1_3_1","series-title":"July 2012","volume-title":"The\u2019re-identification\u2019of Governor William Weld\u2019s medical information: a critical re-examination of health data identification risks and privacy protections, then and now. Then and Now","author":"Barth-Jones Daniel","year":"2012","unstructured":"Daniel Barth-Jones. 2012. The\u2019re-identification\u2019of Governor William Weld\u2019s medical information: a critical re-examination of health data identification risks and privacy protections, then and now. Then and Now (July 2012) (2012)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.3233\/SJU-2001-18412"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1515\/popets-2018-0004"},{"key":"e_1_3_2_1_6_1","volume-title":"The Compromise of Data Privacy in Predictive Performance. In International Symposium on Intelligent Data Analysis. Springer, 426\u2013438","author":"Carvalho T\u00e2nia","year":"2021","unstructured":"T\u00e2nia Carvalho and Nuno Moniz. 2021. The Compromise of Data Privacy in Predictive Performance. In International Symposium on Intelligent Data Analysis. Springer, 426\u2013438."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.pmcj.2019.01.003"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/11818175_12"},{"key":"e_1_3_2_1_9_1","volume-title":"Privacy by design in big data: an overview of privacy enhancing technologies in the era of big data analytics. arXiv preprint arXiv:1512.06000","author":"D\u2019Acquisto Giuseppe","year":"2015","unstructured":"Giuseppe D\u2019Acquisto, Josep Domingo-Ferrer, Panayiotis Kikiras, Vicen\u00e7 Torra, Yves-Alexandre de Montjoye, and Athena Bourka. 2015. Privacy by design in big data: an overview of privacy enhancing technologies in the era of big data analytics. arXiv preprint arXiv:1512.06000 (2015)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSIC54682.2021.00045"},{"key":"e_1_3_2_1_11_1","volume-title":"Retiring Adult: New Datasets for Fair Machine Learning. Advances in Neural Information Processing Systems 34","author":"Ding Frances","year":"2021","unstructured":"Frances Ding, Moritz Hardt, John Miller, and Ludwig Schmidt. 2021. Retiring Adult: New Datasets for Fair Machine Learning. Advances in Neural Information Processing Systems 34 (2021)."},{"volume-title":"Guide to the de-identification of personal health information","author":"El\u00a0Emam Khaled","key":"e_1_3_2_1_12_1","unstructured":"Khaled El\u00a0Emam. 2013. Guide to the de-identification of personal health information. CRC Press."},{"volume-title":"Privacy by design in big data. https:\/\/www.enisa.europa.eu\/publications\/big-data-protection. [Online","year":"2023","key":"e_1_3_2_1_13_1","unstructured":"enisa. 2015. Privacy by design in big data. https:\/\/www.enisa.europa.eu\/publications\/big-data-protection. [Online; accessed 20-Jan-2023]."},{"key":"e_1_3_2_1_14_1","volume-title":"https:\/\/eur-lex.europa.eu\/eli\/reg\/2016\/679\/oj. [Online","author":"EUR-Lex. 2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council of 27 April 2016.","year":"2022","unstructured":"EUR-Lex. 2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council of 27 April 2016. https:\/\/eur-lex.europa.eu\/eli\/reg\/2016\/679\/oj. [Online; accessed 20-Oct-2022]."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1002\/jmv.25750"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2014.06.002"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tele.2021.101564"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2019.05.011"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2009.19"},{"key":"e_1_3_2_1_20_1","volume-title":"Design and evaluation of a data anonymization pipeline to promote Open Science on COVID-19. Scientific data 7, 1","author":"Jakob EM","year":"2020","unstructured":"Carolin\u00a0EM Jakob, Florian Kohlmayer, Thierry Meurers, J\u00f6rg\u00a0Janne Vehreschild, and Fabian Prasser. 2020. Design and evaluation of a data anonymization pipeline to promote Open Science on COVID-19. Scientific data 7, 1 (2020), 1\u201310."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-021-00637-x"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0271260"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.22381\/emfm17120224"},{"key":"e_1_3_2_1_24_1","volume-title":"2007 IEEE 23rd international conference on data engineering. IEEE, 106\u2013115","author":"Li Ninghui","year":"2006","unstructured":"Ninghui Li, Tiancheng Li, and Suresh Venkatasubramanian. 2006. t-closeness: Privacy beyond k-anonymity and l-diversity. In 2007 IEEE 23rd international conference on data engineering. IEEE, 106\u2013115."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2414456.2414474"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557079"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1217299.1217302"},{"key":"e_1_3_2_1_28_1","volume-title":"Reidentification of individuals in Chicago\u2019s homicide database: A technical and legal study","author":"Ochoa Salvador","year":"2001","unstructured":"Salvador Ochoa, Jamie Rasmussen, Christine Robson, and Michael Salib. 2001. Reidentification of individuals in Chicago\u2019s homicide database: A technical and legal study. Massachusetts Institute of Technology (2001)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.2812"},{"key":"e_1_3_2_1_31_1","first-page":"172","article-title":"A study on k-anonymity, l-diversity, and t-closeness techniques","volume":"17","author":"Rajendran Keerthana","year":"2017","unstructured":"Keerthana Rajendran, Manoj Jayabalan, and Muhammad\u00a0Ehsan Rana. 2017. A study on k-anonymity, l-diversity, and t-closeness techniques. IJCSNS 17, 12 (2017), 172.","journal-title":"IJCSNS"},{"key":"e_1_3_2_1_32_1","volume-title":"Estimating the success of re-identifications in incomplete datasets using generative models. Nature communications 10, 1","author":"Rocher Luc","year":"2019","unstructured":"Luc Rocher, Julien\u00a0M Hendrickx, and Yves-Alexandre De\u00a0Montjoye. 2019. Estimating the success of re-identifications in incomplete datasets using generative models. Nature communications 10, 1 (2019), 1\u20139."},{"key":"e_1_3_2_1_33_1","volume-title":"Does k -Anonymous microaggregation affect machine-learned macrotrends?IEEE access 6","author":"Rodr\u00edguez-Hoyos Ana","year":"2018","unstructured":"Ana Rodr\u00edguez-Hoyos, Jos\u00e9 Estrada-Jim\u00e9nez, David Rebollo-Monedero, Javier Parra-Arnau, and Jordi Forn\u00e9. 2018. Does k -Anonymous microaggregation affect machine-learned macrotrends?IEEE access 6 (2018), 28258\u201328277."},{"key":"e_1_3_2_1_34_1","unstructured":"Pierangela Samarati and Latanya Sweeney. 1998. Protecting privacy when disclosing information: k-anonymity and its enforcement through generalization and suppression. (1998)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2021.102488"},{"key":"e_1_3_2_1_36_1","volume-title":"Uniqueness of simple demographics in the US population. LIDAP-WP4","author":"Sweeney Latanya","year":"2000","unstructured":"Latanya Sweeney. 2000. Uniqueness of simple demographics in the US population. LIDAP-WP4, 2000 (2000)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0218488502001648"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289430.3289435"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2018.23211"},{"key":"e_1_3_2_1_40_1","volume-title":"Effects of Training Data Size and Class Imbalance on the Performance of Classifiers. In Conference on Artificial Intelligence and Natural Language. Springer, 3\u201317","author":"Zheng Wanwan","year":"2019","unstructured":"Wanwan Zheng and Mingzhe Jin. 2019. Effects of Training Data Size and Class Imbalance on the Performance of Classifiers. In Conference on Artificial Intelligence and Natural Language. Springer, 3\u201317."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2980235"}],"event":{"name":"ARES 2023: The 18th International Conference on Availability, Reliability and Security","acronym":"ARES 2023","location":"Benevento Italy"},"container-title":["Proceedings of the 18th International Conference on Availability, Reliability and Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3600160.3600168","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3600160.3600168","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:13Z","timestamp":1750178173000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3600160.3600168"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,29]]},"references-count":41,"alternative-id":["10.1145\/3600160.3600168","10.1145\/3600160"],"URL":"https:\/\/doi.org\/10.1145\/3600160.3600168","relation":{},"subject":[],"published":{"date-parts":[[2023,8,29]]},"assertion":[{"value":"2023-08-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}