{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:05:17Z","timestamp":1750219517407,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":22,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819681723","type":"print"},{"value":"9789819681730","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-8173-0_8","type":"book-chapter","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T11:41:51Z","timestamp":1750160511000},"page":"93-104","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving Generalization in\u00a0Deep Neural Networks by\u00a0Mitigating Memorization"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4456-4556","authenticated-orcid":false,"given":"Yong","family":"Zhuang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1601-1401","authenticated-orcid":false,"given":"Tianyu","family":"Kang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1892-5028","authenticated-orcid":false,"given":"Zhen","family":"Lu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3383-551X","authenticated-orcid":false,"given":"Wei","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3789-7686","authenticated-orcid":false,"given":"Ping","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,18]]},"reference":[{"key":"8_CR1","unstructured":"Arpit, D., et\u00a0al.: A closer look at memorization in deep networks. In: International Conference on Machine Learning, pp. 233\u2013242. PMLR (2017)"},{"key":"8_CR2","first-page":"34912","volume":"35","author":"C Cianfarani","year":"2022","unstructured":"Cianfarani, C., Bhagoji, A.N., Sehwag, V., Zhao, B., Zheng, H., Mittal, P.: Understanding robust learning through the lens of representation similarities. Adv. Neural. Inf. Process. Syst. 35, 34912\u201334925 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"8_CR3","first-page":"24226","volume":"35","author":"Y Ding","year":"2022","unstructured":"Ding, Y., Wang, L., Liang, B., Liang, S., Wang, Y., Chen, F.: Domain generalization by learning and removing domain-specific features. Adv. Neural. Inf. Process. Syst. 35, 24226\u201324239 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"8_CR4","unstructured":"Dosovitskiy, A.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"8_CR5","unstructured":"Foret, P., Kleiner, A., Mobahi, H., Neyshabur, B.: Sharpness-aware minimization for efficiently improving generalization. arXiv preprint arXiv:2010.01412 (2020)"},{"key":"8_CR6","unstructured":"Hanin, B., Rolnick, D.: Deep ReLU networks have surprisingly few activation patterns. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Hein, M., Andriushchenko, M., Bitterwolf, J.: Why ReLU networks yield high-confidence predictions far away from the training data and how to mitigate the problem. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 41\u201350 (2019)","DOI":"10.1109\/CVPR.2019.00013"},{"key":"8_CR8","unstructured":"Ji, X., Pascanu, R., Hjelm, R.D., Vedaldi, A., Lakshminarayanan, B., Bengio, Y.: Predicting unreliable predictions by shattering a neural network (2021)"},{"key":"8_CR9","unstructured":"Jiang, Y., Neyshabur, B., Mobahi, H., Krishnan, D., Bengio, S.: Fantastic generalization measures and where to find them. arXiv preprint arXiv:1912.02178 (2019)"},{"key":"8_CR10","first-page":"16577","volume":"35","author":"J Kaddour","year":"2022","unstructured":"Kaddour, J., Liu, L., Silva, R., Kusner, M.J.: When do flat minima optimizers work? Adv. Neural. Inf. Process. Syst. 35, 16577\u201316595 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"8_CR11","unstructured":"Leavitt, M.L., Morcos, A.: Linking average-and worst-case perturbation robustness via class selectivity and dimensionality. arXiv preprint arXiv:2010.07693 (2020)"},{"key":"8_CR12","unstructured":"Leavitt, M.L., Morcos, A.: Selectivity considered harmful: evaluating the causal impact of class selectivity in DNNs. arXiv preprint arXiv:2003.01262 (2020)"},{"key":"8_CR13","unstructured":"Li, X.C., Li, L., Zhan, D.C.: Visualizing, rethinking, and mining the loss landscape of deep neural networks. arXiv preprint arXiv:2405.12493 (2024)"},{"key":"8_CR14","unstructured":"Liang, T., Poggio, T., Rakhlin, A., Stokes, J.: Fisher-Rao metric, geometry, and complexity of neural networks. In: The 22nd International Conference on Artificial Intelligence and Statistics, pp. 888\u2013896. PMLR (2019)"},{"key":"8_CR15","first-page":"19693","volume":"33","author":"H Maennel","year":"2020","unstructured":"Maennel, H., et al.: What do neural networks learn when trained with random labels? Adv. Neural. Inf. Process. Syst. 33, 19693\u201319704 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"8_CR16","unstructured":"Nagarajan, V., Kolter, J.Z.: Generalization in deep networks: The role of distance from initialization. arXiv preprint arXiv:1901.01672 (2019)"},{"key":"8_CR17","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"8_CR18","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S.: The Caltech-UCSD Birds-200-2011 dataset (2011)"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Yang, Y., et al.: Test accuracy vs. Generalization gap: model selection in NLP without accessing training or testing data. In: Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 3011\u20133021 (2023)","DOI":"10.1145\/3580305.3599518"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., Komodakis, N.: Wide residual networks. arXiv preprint arXiv:1605.07146 (2016)","DOI":"10.5244\/C.30.87"},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: When neural networks fail to generalize? A model sensitivity perspective. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a037, pp. 11219\u201311227 (2023)","DOI":"10.1609\/aaai.v37i9.26328"},{"key":"8_CR22","unstructured":"Zhang, S., Reid, I., P\u00e9rez, G., Louis, A.: Why flatness correlates with generalization for deep neural networks. arXiv preprint arXiv:2103.06219 (2021)"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-8173-0_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T11:41:58Z","timestamp":1750160518000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-8173-0_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819681723","9789819681730"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-8173-0_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"18 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sydney, NSW","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}