{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T00:32:42Z","timestamp":1742949162024,"version":"3.40.3"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434112"},{"type":"electronic","value":"9783031434129"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43412-9_14","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:28:38Z","timestamp":1694896118000},"page":"237-253","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning with Noisy Labels by Adaptive Gradient-Based Outlier Removal"],"prefix":"10.1007","author":[{"given":"Anastasiia","family":"Sedova","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lena","family":"Zellinger","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Benjamin","family":"Roth","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"issue":"2","key":"14_CR1","first-page":"310","volume":"28","author":"MB Al-Zoubi","year":"2009","unstructured":"Al-Zoubi, M.B.: An effective clustering-based approach for outlier detection. Eur. J. Sci. Res. 28(2), 310\u2013316 (2009)","journal-title":"Eur. J. Sci. Res."},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Alberto, T.C., Lochter, J.V., Almeida, T.A.: Tubespam: comment spam filtering on youtube. In: 2015 IEEE 14th International Conference on Machine Learning and Applications (ICMLA), pp. 138\u2013143 (2015)","DOI":"10.1109\/ICMLA.2015.37"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Almeida, T.A., Hidalgo, J.M.G., Yamakami, A.: Contributions to the study of sms spam filtering: new collection and results. In: Proceedings of the 11th ACM Symposium on Document Engineering, pp. 259\u2013262 (2011)","DOI":"10.1145\/2034691.2034742"},{"key":"14_CR4","unstructured":"Arazo, E., Ortego, D., Albert, P., O\u2019Connor, N.E., McGuinness, K.: Unsupervised label noise modeling and loss correction. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning, ICML 2019 (2019)"},{"key":"14_CR5","unstructured":"Awasthi, A., Ghosh, S., Goyal, R., Sarawagi, S.: Learning from rules generalizing labeled exemplars. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, 26\u201330 April (2020)"},{"key":"14_CR6","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1016\/j.neucom.2015.05.135","volume":"181","author":"M Bai","year":"2016","unstructured":"Bai, M., Wang, X., Xin, J., Wang, G.: An efficient algorithm for distributed density-based outlier detection on big data. Neurocomputing 181, 19\u201328 (2016)","journal-title":"Neurocomputing"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"B\u00e9n\u00e9dict, G., Koops, H.V., Odijk, D., de Rijke, M.: Sigmoidf1: a smooth f1 score surrogate loss for multilabel classification. Trans. Mach. Learn. Res. (2022)","DOI":"10.1145\/3606375"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Breunig, M.M., Kriegel, H.P., Ng, R.T., Sander, J.: Lof: identifying density-based local outliers. In: Proceedings of the 2000 ACM SIGMOD International Conference on Management of Data, pp. 93\u2013104 (2000)","DOI":"10.1145\/342009.335388"},{"key":"14_CR9","unstructured":"Chen, P., Liao, B.B., Chen, G., Zhang, S.: Understanding and utilizing deep neural networks trained with noisy labels. In: International Conference on Machine Learning, pp. 1062\u20131070 (2019)"},{"key":"14_CR10","unstructured":"Cheng, H., Zhu, Z., Li, X., Gong, Y., Sun, X., Liu, Y.: Learning with instance-dependent label noise: a sample sieve approach. arXiv preprint arXiv:2010.02347 (2020)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Elahi, M., Li, K., Nisar, W., Lv, X., Wang, H.: Efficient clustering-based outlier detection algorithm for dynamic data stream. In: 2008 Fifth International Conference on Fuzzy Systems and Knowledge Discovery, vol. 5. IEEE (2008)","DOI":"10.1109\/FSKD.2008.374"},{"key":"14_CR12","unstructured":"Fang, Z., Kong, S., Wang, Z., Fowlkes, C.C., Yang, Y.: Weak supervision and referring attention for temporal-textual association learning. CoRR abs\/ arXiv: 2006.11747 (2020)"},{"issue":"5","key":"14_CR13","doi-asserted-by":"publisher","first-page":"845","DOI":"10.1109\/TNNLS.2013.2292894","volume":"25","author":"B Fr\u00e9nay","year":"2014","unstructured":"Fr\u00e9nay, B., Verleysen, M.: Classification in the presence of label noise: a survey. IEEE Trans. Neural Netw. Learn. Syst. 25(5), 845\u2013869 (2014)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"14_CR14","unstructured":"Fu, D., Chen, M., Sala, F., Hooper, S., Fatahalian, K., Re, C.: Fast and three-rious: speeding up weak supervision with triplet methods. In: III, H.D., Singh, A. (eds.) Proceedings of the 37th International Conference on Machine Learning, 13\u201318 Jul, vol. 119, pp. 3280\u20133291 (2020)"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Ghoting, A., Parthasarathy, S., Otey, M.E.: Fast mining of distance-based outliers in high-dimensional datasets. In: Data Mining and Knowledge Discovery, vol. 16 (2008)","DOI":"10.1007\/s10618-008-0093-2"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Giacomello, E., Lanzi, P.L., Loiacono, D., Nassano, L.: Image embedding and model ensembling for automated chest x-ray interpretation. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138. IEEE (2021)","DOI":"10.1109\/IJCNN52387.2021.9534378"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition, pp. 770\u2013778 (06 2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Hedderich, M.A., Adelani, D.I., Zhu, D., Alabi, J.O., Markus, U., Klakow, D.: Transfer learning and distant supervision for multilingual transformer models: A study on african languages. In: Webber, B., Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP 2020, Online, 16\u201320 November 2020, pp. 2580\u20132591 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.204"},{"key":"14_CR19","unstructured":"Hedderich, M.A., Lange, L., Klakow, D.: ANEA: distant supervision for low-resource named entity recognition. arXiv: 2102.13129 (2021)"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Huang, J., Qu, L., Jia, R., Zhao, B.: O2u-net: A simple noisy label detection approach for deep neural networks. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), 27 October - 2 November 2019, pp. 3325\u20133333 (2019)","DOI":"10.1109\/ICCV.2019.00342"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Irvin, J., et al.: Chexpert: A large chest radiograph dataset with uncertainty labels and expert comparison. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 590\u2013597 (2019)","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Karamanolakis, G., Mukherjee, S., Zheng, G., Awadallah, A.H.: Self-training with weak supervision. In: Toutanova, K., Rumshisky, A., Zettlemoyer, L., Hakkani-T\u00fcr, D., Beltagy, I., Bethard, S., Cotterell, R., Chakraborty, T., Zhou, Y. (eds.) Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2021, Online, 6\u201311 June 2021, pp. 845\u2013863 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.66"},{"key":"14_CR23","unstructured":"Knox, E.M., Ng, R.T.: Algorithms for mining distancebased outliers in large datasets. In: Proceedings of the International Conference on Very Large Data Bases, pp. 392\u2013403. Citeseer (1998)"},{"key":"14_CR24","unstructured":"Krizhevsky, A.: Learning multiple layers of features from tiny images (2009)"},{"key":"14_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/978-3-030-87237-3_30","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"J Li","year":"2021","unstructured":"Li, J., et al.: Hybrid supervision learning for pathology whole slide image classification. In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12908, pp. 309\u2013318. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87237-3_30"},{"key":"14_CR26","unstructured":"Li, J., Socher, R., Hoi, S.C.: Dividemix: Learning with noisy labels as semi-supervised learning. In: ICLR (2020)"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Li, J., Wong, Y., Zhao, Q., Kankanhalli, M.S.: Learning to learn from noisy labeled data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5051\u20135059 (2019)","DOI":"10.1109\/CVPR.2019.00519"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"Li, X., Roth, D.: Learning question classifiers. In: COLING 2002: The 19th International Conference on Computational Linguistics (2002)","DOI":"10.3115\/1072228.1072378"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Li, Y., Yang, J., Song, Y., Cao, L., Luo, J., Li, L.J.: Learning from noisy labels with distillation. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 1928\u20131936 (2017)","DOI":"10.1109\/ICCV.2017.211"},{"key":"14_CR30","unstructured":"Lipton, Z.C., Wang, Y., Smola, A.J.: Detecting and correcting for label shift with black box predictors. In: Dy, J.G., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, ICML 2018, vol. 80 (2018)"},{"key":"14_CR31","unstructured":"Liu, Z., et al.: Learning not to learn in the presence of noisy labels. CoRR abs\/ arXiv: 2002.06541 (2020)"},{"key":"14_CR32","doi-asserted-by":"publisher","first-page":"1373","DOI":"10.1613\/jair.1.12125","volume":"70","author":"C Northcutt","year":"2021","unstructured":"Northcutt, C., Jiang, L., Chuang, I.: Confident learning: estimating uncertainty in dataset labels. J. Artifi. Intell. Res. 70, 1373\u20131411 (2021)","journal-title":"J. Artifi. Intell. Res."},{"key":"14_CR33","doi-asserted-by":"crossref","unstructured":"Ratner, A., Bach, S.H., Ehrenberg, H., Fries, J., Wu, S., R\u00e9, C.: Snorkel: rapid training data creation with weak supervision. VLDB J. (2) (2020)","DOI":"10.1007\/s00778-019-00552-1"},{"key":"14_CR34","doi-asserted-by":"crossref","unstructured":"Ratner, A., Hancock, B., Dunnmon, J., Sala, F., Pandey, S., R\u00e9, C.: Training complex models with multi-task weak supervision. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, 4763\u20134771 (July 2019)","DOI":"10.1609\/aaai.v33i01.33014763"},{"key":"14_CR35","unstructured":"Ratner, A.J., De Sa, C.M., Wu, S., Selsam, D., R\u00e9, C.: Data programming: creating large training sets, quickly. In: Advances in Neural Information Processing Systems (2016)"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Raykar, V.C., Yu, S.: Eliminating spammers and ranking annotators for crowdsourced labeling tasks. J. Mach. Learn. Res. 13(16) (2012)","DOI":"10.1109\/NCVPRIPG.2011.14"},{"key":"14_CR37","doi-asserted-by":"crossref","unstructured":"Ren, W., Li, Y., Su, H., Kartchner, D., Mitchell, C., Zhang, C.: Denoising multi-source weak supervision for neural text classification. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, Online Event, 16\u201320 November 2020, vol. EMNLP 2020, pp. 3739\u20133754 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.334"},{"issue":"3","key":"14_CR38","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"14_CR39","unstructured":"Shi, Y., et al.: Gradient matching for domain generalization. arXiv preprint arXiv:2104.09937 (2021)"},{"key":"14_CR40","doi-asserted-by":"crossref","unstructured":"Stephan, A., Kougia, V., Roth, B.: SepLL: separating latent class labels from weak supervision noise. In: Findings of the Association for Computational Linguistics: EMNLP 2022. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (2022)","DOI":"10.18653\/v1\/2022.findings-emnlp.288"},{"key":"14_CR41","unstructured":"Sukhbaatar, S., Bruna, J., Paluri, M., Bourdev, L., Fergus, R.: Training convolutional networks with noisy labels. arXiv preprint arXiv:1406.2080 (2014)"},{"key":"14_CR42","unstructured":"Tan, M., Le, Q.: Efficientnet: rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning. PMLR (2019)"},{"key":"14_CR43","unstructured":"Tratz, S., Hovy, E.: A taxonomy, dataset, and classifier for automatic noun compound interpretation. In: Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics, pp. 678\u2013687 (Jul 2010)"},{"key":"14_CR44","doi-asserted-by":"publisher","first-page":"107964","DOI":"10.1109\/ACCESS.2019.2932769","volume":"7","author":"H Wang","year":"2019","unstructured":"Wang, H., Bah, M.J., Hammad, M.: Progress in outlier detection techniques: a survey. IEEE Access 7, 107964\u2013108000 (2019)","journal-title":"IEEE Access"},{"key":"14_CR45","doi-asserted-by":"crossref","unstructured":"Wang, Y., Ma, X., Chen, Z., Luo, Y., Yi, J., Bailey, J.: Symmetric cross entropy for robust learning with noisy labels. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 322\u2013330 (2019)","DOI":"10.1109\/ICCV.2019.00041"},{"key":"14_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Z., Shang, J., Liu, L., Lu, L., Liu, J., Han, J.: Crossweigh: training named entity tagger from imperfect annotations. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, EMNLP-IJCNLP 2019 (2019)","DOI":"10.18653\/v1\/D19-1519"},{"key":"14_CR47","unstructured":"Wei, J.: Label noise reduction without assumptions. Dartmouth College Undergraduate Theses, vol. 164 (2020)"},{"key":"14_CR48","unstructured":"Zhang, J., et al.: WRENCH: a comprehensive benchmark for weak supervision. In: Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2021)"},{"key":"14_CR49","unstructured":"Zhao, B., Mopuri, K.R., Bilen, H.: Dataset condensation with gradient matching. In: International Conference on Learning Representations (2021)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43412-9_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:31:01Z","timestamp":1694896261000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43412-9_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434112","9783031434129"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43412-9_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Our method can improve the model predictions and produce more useful results, but we cannot promise they are perfect, especially for life-critical domains like healthcare. Data used for training can have biases that machine learning methods may pick up, and one needs to be careful when using such models in actual applications. We relied on datasets that were already published and did not hire anyone to annotate them for our work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}