{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T12:50:00Z","timestamp":1761396600358,"version":"3.40.3"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031408366"},{"type":"electronic","value":"9783031408373"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40837-3_13","type":"book-chapter","created":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T23:02:25Z","timestamp":1692658945000},"page":"200-226","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["The Split Matters: Flat Minima Methods for\u00a0Improving the\u00a0Performance of\u00a0GNNs"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6079-6480","authenticated-orcid":false,"given":"Nicolas","family":"Lell","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2653-9245","authenticated-orcid":false,"given":"Ansgar","family":"Scherp","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,22]]},"reference":[{"key":"13_CR1","doi-asserted-by":"publisher","unstructured":"Bahri, D., Mobahi, H., Tay, Y.: Sharpness-aware minimization improves language model generalization. In: ACL 2022. ACL (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.508","DOI":"10.18653\/v1\/2022.acl-long.508"},{"key":"13_CR2","unstructured":"Brock, A., De, S., Smith, S.L., Simonyan, K.: High-performance large-scale image recognition without normalization. In: ICML 2021. PMLR (2021)"},{"key":"13_CR3","unstructured":"Chen, J., Zhu, J., Song, L.: Stochastic training of graph convolutional networks with variance reduction. In: ICML 2018. PMLR (2018)"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Chen, M., Wei, Z., Huang, Z., Ding, B., Li, Y.: Simple and deep graph convolutional networks. In: ICML 2020. PMLR (2020)","DOI":"10.1145\/3340531.3412139"},{"key":"13_CR5","unstructured":"Chen, X., Hsieh, C., Gong, B.: When vision transformers outperform ResNets without pre-training or strong data augmentations. In: ICLR 2022. OpenReview.net (2022)"},{"key":"13_CR6","unstructured":"Damian, A., Ma, T., Lee, J.D.: Label noise SGD provably prefers flat global minimizers. In: NeurIPS 2021 (2021)"},{"key":"13_CR7","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019. ACL (2019). https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"13_CR8","unstructured":"Du, J., Zhou, D., Feng, J., Tan, V., Zhou, J.T.: Sharpness-aware training for free. In: NeurIPS (2022)"},{"key":"13_CR9","unstructured":"Foret, P., Kleiner, A., Mobahi, H., Neyshabur, B.: Sharpness-aware minimization for efficiently improving generalization. In: ICLR 2021. OpenReview.net (2021)"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Guo, H., Jin, J., Liu, B.: Stochastic weight averaging revisited. CoRR (2022)","DOI":"10.3390\/app13052935"},{"key":"13_CR11","unstructured":"Gupta, V., Serrano, S.A., DeCoste, D.: Stochastic weight averaging in parallel: large-batch training that generalizes well. In: ICLR 2020. OpenReview.net (2020)"},{"key":"13_CR12","doi-asserted-by":"publisher","unstructured":"Hamilton, W.L.: Graph Representation Learning. Morgan & Claypool Publishers (2020). https:\/\/doi.org\/10.2200\/S01045ED1V01Y202009AIM046","DOI":"10.2200\/S01045ED1V01Y202009AIM046"},{"key":"13_CR13","unstructured":"Hamilton, W.L., Ying, Z., Leskovec, J.: Inductive representation learning on large graphs. In: NeurIPS 2017 (2017)"},{"key":"13_CR14","unstructured":"Hochreiter, S., Schmidhuber, J.: Simplifying neural nets by discovering flat minima. In: NeurIPS 1994. MIT Press (1994)"},{"key":"13_CR15","doi-asserted-by":"publisher","unstructured":"Hochreiter, S., Schmidhuber, J.: Flat minima. Neural Comput. (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.1.1","DOI":"10.1162\/neco.1997.9.1.1"},{"key":"13_CR16","unstructured":"Hu, W., et al.: Open graph benchmark: Datasets for machine learning on graphs. In: NeurIPS 2020 (2020)"},{"key":"13_CR17","unstructured":"Hu, Y., You, H., Wang, Z., Wang, Z., Zhou, E., Gao, Y.: Graph-MLP: node classification without message passing in graph. CoRR (2021)"},{"key":"13_CR18","unstructured":"Huang, G., Li, Y., Pleiss, G., Liu, Z., Hopcroft, J.E., Weinberger, K.Q.: Snapshot ensembles: train 1, get M for free. In: ICLR 2017. OpenReview.net (2017)"},{"key":"13_CR19","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D.P., Wilson, A.G.: Averaging weights leads to wider optima and better generalization. In: Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, UAI 2018. AUAI Press (2018)"},{"key":"13_CR20","doi-asserted-by":"publisher","DOI":"10.1145\/3418526","author":"C Jin","year":"2021","unstructured":"Jin, C., Netrapalli, P., Ge, R., Kakade, S.M., Jordan, M.I.: On nonconvex optimization for machine learning: gradients, stochasticity, and saddle points. J. ACM (2021). https:\/\/doi.org\/10.1145\/3418526","journal-title":"J. ACM"},{"key":"13_CR21","unstructured":"Kaddour, J., Liu, L., Silva, R., Kusner, M.: When do flat minima optimizers work? In: NeurIPS (2022). https:\/\/openreview.net\/forum?id=vDeh2yxTvuh"},{"key":"13_CR22","unstructured":"Kim, M., Li, D., Hu, S.X., Hospedales, T.M.: Fisher SAM: information geometry and sharpness aware minimisation. In: ICML 2022. PMLR (2022)"},{"key":"13_CR23","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: ICLR 2017. OpenReview.net (2017)"},{"key":"13_CR24","unstructured":"Kwon, J., Kim, J., Park, H., Choi, I.K.: ASAM: adaptive sharpness-aware minimization for scale-invariant learning of deep neural networks. In: ICML 2021. PMLR (2021)"},{"key":"13_CR25","unstructured":"Li, H., Xu, Z., Taylor, G., Studer, C., Goldstein, T.: Visualizing the loss landscape of neural nets. In: NeurIPS 2018 (2018)"},{"key":"13_CR26","unstructured":"Liu, J., Cai, J., Zhuang, B.: Sharpness-aware quantization for deep neural networks. CoRR (2021)"},{"key":"13_CR27","doi-asserted-by":"publisher","unstructured":"Liu, Y., Mai, S., Chen, X., Hsieh, C., You, Y.: Towards efficient and scalable sharpness-aware minimization. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, 18\u201324 June 2022. IEEE (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01204","DOI":"10.1109\/CVPR52688.2022.01204"},{"key":"13_CR28","doi-asserted-by":"publisher","unstructured":"Na, C., Mehta, S.V., Strubell, E.: Train flat, then compress: sharpness-aware minimization learns more compressible models. CoRR (2022). https:\/\/doi.org\/10.48550\/arXiv.2205.12694","DOI":"10.48550\/arXiv.2205.12694"},{"key":"13_CR29","unstructured":"Namata, G., London, B., Getoor, L., Huang, B., Edu, U.: Query-driven active surveying for collective classification. In: 10th International Workshop on Mining and Learning with Graphs (2012)"},{"key":"13_CR30","unstructured":"Orvieto, A., Kersting, H., Proske, F., Bach, F.R., Lucchi, A.: Anticorrelated noise injection for improved generalization. In: ICML 2022. PMLR (2022)"},{"key":"13_CR31","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v29i3.2157","author":"P Sen","year":"2008","unstructured":"Sen, P., Namata, G., Bilgic, M., Getoor, L., Gallagher, B., Eliassi-Rad, T.: Collective classification in network data. AI Mag. (2008). https:\/\/doi.org\/10.1609\/aimag.v29i3.2157","journal-title":"AI Mag."},{"key":"13_CR32","unstructured":"Shchur, O., Mumme, M., Bojchevski, A., G\u00fcnnemann, S.: Pitfalls of graph neural network evaluation. CoRR (2018)"},{"key":"13_CR33","unstructured":"Velickovic, P., Cucurull, G., Casanova, A., Romero, A., Li\u00f2, P., Bengio, Y.: Graph attention networks. In: ICLR 2018. OpenReview.net (2018)"},{"key":"13_CR34","unstructured":"Wortsman, M., et al.: Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference time. In: ICML 2022. PMLR (2022)"},{"key":"13_CR35","unstructured":"Wu, F., Jr., Souza, A.H., Zhang, T., Fifty, C., Yu, T., Weinberger, K.Q.: Simplifying graph convolutional networks. In: ICML 2019. PMLR (2019)"},{"key":"13_CR36","unstructured":"Xu, K., Li, C., Tian, Y., Sonobe, T., Kawarabayashi, K., Jegelka, S.: Representation learning on graphs with jumping knowledge networks. In: ICML 2018. PMLR (2018)"},{"key":"13_CR37","unstructured":"Yang, G., Zhang, T., Kirichenko, P., Bai, J., Wilson, A.G., Sa, C.D.: SWALP: stochastic weight averaging in low precision training. In: ICML 2019. PMLR (2019)"},{"key":"13_CR38","unstructured":"Yang, Z., Cohen, W.W., Salakhutdinov, R.: Revisiting semi-supervised learning with graph embeddings. In: ICML 2016. JMLR.org (2016)"},{"key":"13_CR39","unstructured":"Zeng, H., Zhou, H., Srivastava, A., Kannan, R., Prasanna, V.: GraphSAINT: graph sampling based inductive learning method. In: ICLR 2020 (2020). https:\/\/openreview.net\/forum?id=BJe8pkHFwS"},{"key":"13_CR40","unstructured":"Zhao, Y., Zhang, H., Hu, X.: Penalizing gradient norm for efficiently improving generalization in deep learning. In: ICML 2022. PMLR (2022)"},{"key":"13_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.01.001","author":"J Zhou","year":"2020","unstructured":"Zhou, J., et al.: Graph neural networks: a review of methods and applications. AI Open (2020). https:\/\/doi.org\/10.1016\/j.aiopen.2021.01.001","journal-title":"AI Open"},{"key":"13_CR42","unstructured":"Zhou, M., Liu, T., Li, Y., Lin, D., Zhou, E., Zhao, T.: Toward understanding the importance of noise in training neural networks. In: ICML 2019. PMLR (2019)"},{"key":"13_CR43","doi-asserted-by":"publisher","DOI":"10.1201\/b12207","volume-title":"Ensemble Methods: Foundations and Algorithms","author":"ZH Zhou","year":"2012","unstructured":"Zhou, Z.H.: Ensemble Methods: Foundations and Algorithms. CRC Press, Boca Raton (2012)"},{"key":"13_CR44","unstructured":"Zhuang, J., et al.: Surrogate gap minimization improves sharpness-aware training. In: ICLR 2022. OpenReview.net (2022)"},{"key":"13_CR45","doi-asserted-by":"publisher","unstructured":"Zitnik, M., Leskovec, J.: Predicting multicellular function through multi-layer tissue networks. Bioinformatics (2017). https:\/\/doi.org\/10.1093\/bioinformatics\/btx252","DOI":"10.1093\/bioinformatics\/btx252"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Extraction"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40837-3_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T11:32:13Z","timestamp":1710329533000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40837-3_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031408366","9783031408373"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40837-3_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CD-MAKE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Cross-Domain Conference for Machine Learning and Knowledge Extraction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Benevento","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cd-make2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cd-make.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"60% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}