{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:47:48Z","timestamp":1755794868926,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Hellenic Foundation for Research and Innovation (H.F.R.I.)","award":["1941"],"award-info":[{"award-number":["1941"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100018693","name":"HORIZON EUROPE Framework Programme","doi-asserted-by":"publisher","award":["101135775"],"award-info":[{"award-number":["101135775"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","award":["617393"],"award-info":[{"award-number":["617393"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709285","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"1068-1079","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Data Glitches Discovery using Influence-based Model Explanations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9575-087X","authenticated-orcid":false,"given":"Nikolaos","family":"Myrtakis","sequence":"first","affiliation":[{"name":"ETIS Lab, ENSEA, Cergy, France &amp; University of Crete, Crete, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2492-959X","authenticated-orcid":false,"given":"Ioannis","family":"Tsamardinos","sequence":"additional","affiliation":[{"name":"University of Crete, Crete, Greece"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2076-1881","authenticated-orcid":false,"given":"Vassilis","family":"Christophides","sequence":"additional","affiliation":[{"name":"ETIS Lab, ENSEA, Cergy, France"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR","author":"Barshan Elnaz","year":"2020","unstructured":"Elnaz Barshan, Marc-Etienne Brunet, and Gintare Karolina Dziugaite. 2020. Relatif: Identifying explanatory training samples via relative influence. In International Conference on Artificial Intelligence and Statistics. PMLR, 1899--1909."},{"key":"e_1_3_2_2_2_1","volume-title":"Influence Functions in Deep Learning Are Fragile. In ICLR","author":"Basu Samyadeep","year":"2021","unstructured":"Samyadeep Basu, Phillip Pope, and Soheil Feizi. [n.d.]. Influence Functions in Deep Learning Are Fragile. In ICLR 2021."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1049\/ip-vis:19941330"},{"key":"e_1_3_2_2_4_1","volume-title":"Data Management for Machine Learning: A Survey","author":"Chai Chengliang","year":"2022","unstructured":"Chengliang Chai, Jiayi Wang, Yuyu Luo, Zeping Niu, and Guoliang Li. 2022. Data Management for Machine Learning: A Survey. IEEE Transactions on Knowledge and Data Engineering (2022), 1--1."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16871"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01446"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.2307\/1271434"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2018.03.008"},{"key":"e_1_3_2_2_9_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021."},{"key":"e_1_3_2_2_10_1","volume-title":"NeurIPS ML Safety Workshop.","author":"Farquhar Sebastian","year":"2022","unstructured":"Sebastian Farquhar and Yarin Gal. 2022. What'Out-of-distribution'Is and Is Not. In NeurIPS ML Safety Workshop."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2292894"},{"key":"e_1_3_2_2_12_1","volume-title":"Mislabeled examples detection viewed as probing machine learning models: concepts, survey and extensive benchmark. arXiv preprint arXiv:2410.15772","author":"George Thomas","year":"2024","unstructured":"Thomas George, Pierre Nodet, Alexis Bondu, and Vincent Lemaire. 2024. Mislabeled examples detection viewed as probing machine learning models: concepts, survey and extensive benchmark. arXiv preprint arXiv:2410.15772 (2024)."},{"key":"e_1_3_2_2_13_1","unstructured":"Amirata Ghorbani and James Zou. 2019. Data shapley: Equitable valuation of data for machine learning. In ICML. PMLR 2242--2251."},{"key":"e_1_3_2_2_14_1","first-page":"18932","article-title":"Revisiting deep learning models for tabular data","volume":"34","author":"Gorishniy Yury","year":"2021","unstructured":"Yury Gorishniy, Ivan Rubachev, Valentin Khrulkov, and Artem Babenko. 2021. Revisiting deep learning models for tabular data. Advances in Neural Information Processing Systems, Vol. 34 (2021), 18932--18943.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_15_1","volume-title":"Ruhi Sharma Mittal, and Vitobha Munigala","author":"Gupta Nitin","year":"2021","unstructured":"Nitin Gupta, Shashank Mujumdar, Hima Patel, Satoshi Masuda, Naveen Panwar, Sambaran Bandyopadhyay, Sameep Mehta, Shanmukha Guttula, Shazia Afzal, Ruhi Sharma Mittal, and Vitobha Munigala. 2021. Data Quality for Machine Learning Tasks. In KDD. New York, NY, USA, 4040--4041."},{"key":"e_1_3_2_2_16_1","volume-title":"Training data influence analysis and estimation: A survey. arXiv preprint arXiv:2212.04612","author":"Hammoudeh Zayd","year":"2022","unstructured":"Zayd Hammoudeh and Daniel Lowd. 2022. Training data influence analysis and estimation: A survey. arXiv preprint arXiv:2212.04612 (2022)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1974.10482962"},{"key":"e_1_3_2_2_18_1","volume-title":"A survey of label-noise representation learning: Past, present and future. arXiv preprint arXiv:2011.04406","author":"Han Bo","year":"2020","unstructured":"Bo Han, Quanming Yao, Tongliang Liu, Gang Niu, Ivor W Tsang, James T Kwok, and Masashi Sugiyama. 2020. A survey of label-noise representation learning: Past, present and future. arXiv preprint arXiv:2011.04406 (2020)."},{"key":"e_1_3_2_2_19_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Hara Satoshi","year":"2019","unstructured":"Satoshi Hara, Atsushi Nitanda, and Takanori Maehara. 2019. Data cleansing for models trained with SGD. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_21_1","volume-title":"Discovering cluster-based local outliers. Pattern recognition letters","author":"He Zengyou","year":"2003","unstructured":"Zengyou He, Xiaofei Xu, and Shengchun Deng. 2003. Discovering cluster-based local outliers. Pattern recognition letters, Vol. 24, 9--10 (2003), 1641--1650."},{"key":"e_1_3_2_2_22_1","volume-title":"Benchmarking neural network robustness to common corruptions and surface variations. arXiv preprint arXiv:1807.01697","author":"Hendrycks Dan","year":"2018","unstructured":"Dan Hendrycks and Thomas G Dietterich. 2018. Benchmarking neural network robustness to common corruptions and surface variations. arXiv preprint arXiv:1807.01697 (2018)."},{"key":"e_1_3_2_2_23_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1167--1176","author":"Jia Ruoxi","year":"2019","unstructured":"Ruoxi Jia, David Dao, Boxin Wang, Frances Ann Hubis, Nick Hynes, Nezihe Merve G\u00fcrel, Bo Li, Ce Zhang, Dawn Song, and Costas J Spanos. 2019. Towards efficient data valuation based on the shapley value. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1167--1176."},{"key":"e_1_3_2_2_24_1","volume-title":"International conference on machine learning. PMLR","author":"Koh Pang Wei","year":"2017","unstructured":"Pang Wei Koh and Percy Liang. 2017. Understanding black-box predictions via influence functions. In International conference on machine learning. PMLR, 1885--1894."},{"key":"e_1_3_2_2_25_1","volume-title":"On the accuracy of influence functions for measuring group effects. Advances in neural information processing systems","author":"Koh Pang Wei W","year":"2019","unstructured":"Pang Wei W Koh, Kai-Siang Ang, Hubert Teo, and Percy S Liang. 2019. On the accuracy of influence functions for measuring group effects. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_26_1","volume-title":"International Conference on Learning Representations.","author":"Kong Shuming","year":"2021","unstructured":"Shuming Kong, Yanyan Shen, and Linpeng Huang. 2021. Resolving training biases via influence-based data relabeling. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_27_1","first-page":"2400","article-title":"Understanding instance-based interpretability of variational auto-encoders","volume":"34","author":"Kong Zhifeng","year":"2021","unstructured":"Zhifeng Kong and Kamalika Chaudhuri. 2021. Understanding instance-based interpretability of variational auto-encoders. Advances in Neural Information Processing Systems, Vol. 34 (2021), 2400--2412.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_28_1","volume-title":"CleanML: A Study for Evaluating the Impact of Data Cleaning on ML Classification Tasks. 2021 IEEE 37th International Conference on Data Engineering (ICDE)","author":"Li Peng","year":"2021","unstructured":"Peng Li, Xi Rao, Jennifer Blase, Yue Zhang, Xu Chu, and Ce Zhang. 2021. CleanML: A Study for Evaluating the Impact of Data Cleaning on ML Classification Tasks. 2021 IEEE 37th International Conference on Data Engineering (ICDE) (2021), 13--24."},{"key":"e_1_3_2_2_29_1","volume-title":"Webvision database: Visual learning and understanding from web data. arXiv preprint arXiv:1708.02862","author":"Li Wen","year":"2017","unstructured":"Wen Li, Limin Wang, Wei Li, Eirikur Agustsson, and Luc Van Gool. 2017. Webvision database: Visual learning and understanding from web data. arXiv preprint arXiv:1708.02862 (2017)."},{"key":"e_1_3_2_2_30_1","volume-title":"Kai Ming Ting, and Zhi-Hua Zhou","author":"Liu Fei Tony","year":"2008","unstructured":"Fei Tony Liu, Kai Ming Ting, and Zhi-Hua Zhou. 2008. Isolation forest. In 2008 eighth ieee international conference on data mining. IEEE, 413--422."},{"key":"e_1_3_2_2_31_1","volume-title":"Towards out-of-distribution generalization: A survey. arXiv preprint arXiv:2108.13624","author":"Liu Jiashuo","year":"2021","unstructured":"Jiashuo Liu, Zheyan Shen, Yue He, Xingxuan Zhang, Renzhe Xu, Han Yu, and Peng Cui. 2021. Towards out-of-distribution generalization: A survey. arXiv preprint arXiv:2108.13624 (2021)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btl346"},{"key":"e_1_3_2_2_34_1","volume-title":"Mnist-c: A robustness benchmark for computer vision. arXiv preprint arXiv:1906.02337","author":"Mu Norman","year":"2019","unstructured":"Norman Mu and Justin Gilmer. 2019. Mnist-c: A robustness benchmark for computer vision. arXiv preprint arXiv:1906.02337 (2019)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3083060"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13222-022-00413-2"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.104"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12125"},{"key":"e_1_3_2_2_39_1","first-page":"35645","article-title":"Robustness to Label Noise Depends on the Shape of the Noise Distribution","volume":"35","author":"Oyen Diane","year":"2022","unstructured":"Diane Oyen, Michal Kucer, Nicolas Hengartner, and Har Simrat Singh. 2022. Robustness to Label Noise Depends on the Shape of the Noise Distribution. Advances in Neural Information Processing Systems, Vol. 35 (2022), 35645--35656.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_40_1","volume-title":"Running experiments on amazon mechanical turk. Judgment and Decision making","author":"Paolacci Gabriele","year":"2010","unstructured":"Gabriele Paolacci, Jesse Chandler, and Panagiotis G Ipeirotis. 2010. Running experiments on amazon mechanical turk. Judgment and Decision making, Vol. 5, 5 (2010), 411--419."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CBMS.2006.65"},{"key":"e_1_3_2_2_42_1","first-page":"19920","article-title":"Estimating training data influence by tracing gradient descent","volume":"33","author":"Pruthi Garima","year":"2020","unstructured":"Garima Pruthi, Frederick Liu, Satyen Kale, and Mukund Sundararajan. 2020. Estimating training data influence by tracing gradient descent. Advances in Neural Information Processing Systems, Vol. 33 (2020), 19920--19930.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_43_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Ruff Lukas","year":"2018","unstructured":"Lukas Ruff, Robert Vandermeulen, Nico Goernitz, Lucas Deecke, Shoaib Ahmed Siddiqui, Alexander Binder, Emmanuel M\u00fcller, and Marius Kloft. 2018. Deep One-Class Classification. In Proceedings of the 35th International Conference on Machine Learning, Vol. 80. 4393--4402."},{"key":"e_1_3_2_2_44_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Schioppa Andrea","year":"2024","unstructured":"Andrea Schioppa, Katja Filippova, Ivan Titov, and Polina Zablotskaia. 2024. Theoretical and practical perspectives on what influence functions do. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20791"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMSNETS53615.2022.9668557"},{"key":"e_1_3_2_2_47_1","volume-title":"International Conference on Machine Learning. PMLR, 5907--5915","author":"Song Hwanjun","year":"2019","unstructured":"Hwanjun Song, Minseok Kim, and Jae-Gil Lee. 2019. Selfie: Refurbishing unclean samples for robust deep learning. In International Conference on Machine Learning. PMLR, 5907--5915."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3152527"},{"key":"e_1_3_2_2_49_1","volume-title":"Representer point selection via local jacobian expansion for post-hoc classifier explanation of deep neural networks and ensemble models. Advances in neural information processing systems","author":"Sui Yi","year":"2021","unstructured":"Yi Sui, Ga Wu, and Scott Sanner. 2021. Representer point selection via local jacobian expansion for post-hoc classifier explanation of deep neural networks and ensemble models. Advances in neural information processing systems, Vol. 34 (2021), 23347--23358."},{"key":"e_1_3_2_2_50_1","first-page":"12966","article-title":"Interactive label cleaning with example-based explanations","volume":"34","author":"Teso Stefano","year":"2021","unstructured":"Stefano Teso, Andrea Bontempelli, Fausto Giunchiglia, and Andrea Passerini. 2021. Interactive label cleaning with example-based explanations. Advances in Neural Information Processing Systems, Vol. 34 (2021), 12966--12977.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892058"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551636"},{"key":"e_1_3_2_2_53_1","volume-title":"CoRR","volume":"2112","author":"Whang Steven Euijong","year":"2021","unstructured":"Steven Euijong Whang, Yuji Roh, Hwanjun Song, and Jae-Gil Lee. 2021. Data Collection and Quality Challenges in Deep Learning: A Data-Centric AI Perspective. CoRR, Vol. 2112.06409 (2021)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2023.3270293"},{"key":"e_1_3_2_2_55_1","first-page":"32598","article-title":"Openood: Benchmarking generalized out-of-distribution detection","volume":"35","author":"Yang Jingkang","year":"2022","unstructured":"Jingkang Yang, Pengyun Wang, Dejian Zou, Zitang Zhou, Kunyuan Ding, Wenxuan Peng, Haoqi Wang, Guangyao Chen, Bo Li, Yiyou Sun, et al. 2022. Openood: Benchmarking generalized out-of-distribution detection. Advances in Neural Information Processing Systems, Vol. 35 (2022), 32598--32611.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_56_1","volume-title":"Ian En-Hsu Yen, and Pradeep K Ravikumar","author":"Yeh Chih-Kuan","year":"2018","unstructured":"Chih-Kuan Yeh, Joon Kim, Ian En-Hsu Yen, and Pradeep K Ravikumar. 2018. Representer point selection for explaining deep neural networks. NeurIPS, Vol. 31 (2018)."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3446776"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11610"},{"key":"e_1_3_2_2_59_1","article-title":"PyOD: A Python Toolbox for Scalable Outlier Detection","volume":"20","author":"Zhao Yue","year":"2019","unstructured":"Yue Zhao, Zain Nasrullah, and Zheng Li. 2019. PyOD: A Python Toolbox for Scalable Outlier Detection. J. Mach. Learn. Res., Vol. 20 (2019), 96:1--96:7.","journal-title":"J. Mach. Learn. Res."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709285","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709285","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:39:28Z","timestamp":1755358768000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709285"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":59,"alternative-id":["10.1145\/3690624.3709285","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709285","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}