{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T10:06:07Z","timestamp":1780653967828,"version":"3.54.1"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032091918","type":"print"},{"value":"9783032091925","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-09192-5_18","type":"book-chapter","created":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T23:25:15Z","timestamp":1767309915000},"page":"267-283","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CGD: Modifying the\u00a0Loss Landscape by\u00a0Gradient Regularization"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0386-6534","authenticated-orcid":false,"given":"Shikhar","family":"Saxena","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9313-5662","authenticated-orcid":false,"given":"Tejas","family":"Bodas","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5819-3798","authenticated-orcid":false,"given":"Arti","family":"Yardi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"18_CR1","unstructured":"Barrett, D.G.T., Dherin, B.: Implicit gradient regularization (2022). https:\/\/arxiv.org\/abs\/2009.11162"},{"key":"18_CR2","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611973655","author":"A Beck","year":"2014","unstructured":"Beck, A.: Introduction to nonlinear optimization: theory, algorithms, and applications with MATLAB. Soc. Ind. Appl. Math. (2014). https:\/\/doi.org\/10.1137\/1.9781611973655","journal-title":"Soc. Ind. Appl. Math."},{"issue":"99","key":"18_CR3","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1090\/S0025-5718-1967-0224273-2","volume":"21","author":"CG Broyden","year":"1967","unstructured":"Broyden, C.G.: Quasi-newton methods and their application to function minimisation. Math. Comput. 21(99), 368\u2013381 (1967). https:\/\/doi.org\/10.1090\/S0025-5718-1967-0224273-2","journal-title":"Math. Comput."},{"issue":"12","key":"18_CR4","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/ab39d9","volume":"2019","author":"P Chaudhari","year":"2019","unstructured":"Chaudhari, P., et al.: Entropy-SGD: biasing gradient descent into wide valleys. J. Stat. Mech: Theory Exp. 2019(12), 124018 (2019). https:\/\/doi.org\/10.1088\/1742-5468\/ab39d9","journal-title":"J. Stat. Mech: Theory Exp."},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Du, J., Zhou, D., Feng, J., Tan, V., Zhou, J.T.: Sharpness-aware training for free. In: Advances in Neural Information Processing Systems, pp. 23439\u201323451 (2022). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/948b1c9d660d7286dd767cd07dabd487-Paper-Conference.pdf","DOI":"10.52202\/068431-1703"},{"issue":"3","key":"18_CR6","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1093\/comjnl\/13.3.317","volume":"13","author":"R Fletcher","year":"1970","unstructured":"Fletcher, R.: A new approach to variable metric algorithms. Comput. J. 13(3), 317\u2013322 (1970). https:\/\/doi.org\/10.1093\/comjnl\/13.3.317","journal-title":"Comput. J."},{"key":"18_CR7","unstructured":"Foret, P., Kleiner, A., Mobahi, H., Neyshabur, B.: Sharpness-aware minimization for efficiently improving generalization (2021). https:\/\/arxiv.org\/abs\/2010.01412"},{"issue":"1","key":"18_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1162\/neco.1997.9.1.1","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Flat minima. Neural Comput. 9(1), 1\u201342 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.1.1","journal-title":"Neural Comput."},{"key":"18_CR9","unstructured":"Karakida, R., Takase, T., Hayase, T., Osawa, K.: Understanding gradient regularization in deep learning: efficient finite-difference computation and implicit bias. In: Proceedings of the 40th International Conference on Machine Learning, pp. 15809\u201315827 (2023). https:\/\/proceedings.mlr.press\/v202\/karakida23a.html"},{"key":"18_CR10","doi-asserted-by":"publisher","unstructured":"Karimi, H., Nutini, J., Schmidt, M.: Linear convergence of gradient and proximal-gradient methods under the polyak-\u0142ojasiewicz condition. In: Machine Learning and Knowledge Discovery in Databases, pp. 795\u2013811 (2016). https:\/\/doi.org\/10.1007\/978-3-319-46128-1_50","DOI":"10.1007\/978-3-319-46128-1_50"},{"key":"18_CR11","unstructured":"Keskar, N.S., Mudigere, D., Nocedal, J., Smelyanskiy, M., Tang, P.T.P.: On large-batch training for deep learning: generalization gap and sharp minima (2017). https:\/\/arxiv.org\/abs\/1609.04836"},{"key":"18_CR12","unstructured":"Li, H., Xu, Z., Taylor, G., Studer, C., Goldstein, T.: Visualizing the loss landscape of neural nets. In: Advances in Neural Information Processing Systems (2018). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2018\/file\/a41b3bb3e6b050b6c9067c67f663b915-Paper.pdf"},{"key":"18_CR13","doi-asserted-by":"publisher","unstructured":"Nocedal, J., Wright, S.J.: Numerical Optimization, 2nd edn. Springer, New York (2006). https:\/\/doi.org\/10.1007\/978-0-387-40065-5","DOI":"10.1007\/978-0-387-40065-5"},{"issue":"1","key":"18_CR14","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1162\/neco.1994.6.1.147","volume":"6","author":"BA Pearlmutter","year":"1994","unstructured":"Pearlmutter, B.A.: Fast exact multiplication by the hessian. Neural Comput. 6(1), 147\u2013160 (1994). https:\/\/doi.org\/10.1162\/neco.1994.6.1.147","journal-title":"Neural Comput."},{"issue":"4","key":"18_CR15","doi-asserted-by":"publisher","first-page":"864","DOI":"10.1016\/0041-5553(63)90382-3","volume":"3","author":"B Polyak","year":"1963","unstructured":"Polyak, B.: Gradient methods for the minimisation of functionals. USSR Comput. Math. Math. Phys. 3(4), 864\u2013878 (1963). https:\/\/doi.org\/10.1016\/0041-5553(63)90382-3","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"18_CR16","doi-asserted-by":"publisher","unstructured":"Prakash, U., Chollera, A., Khatwani, K., K.\u00a0J., P., Bodas, T.: Practical first-order bayesian optimization algorithms. In: Proceedings of the 7th Joint International Conference on Data Science & Management of Data (11th ACM IKDD CODS and 29th COMAD), pp. 173\u2013181 (2024). https:\/\/doi.org\/10.1145\/3632410.3632418","DOI":"10.1145\/3632410.3632418"},{"key":"18_CR17","unstructured":"Ruder, S.: An overview of gradient descent optimization algorithms (2017). https:\/\/arxiv.org\/abs\/1609.04747"},{"key":"18_CR18","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning, pp. 1889\u20131897 (2015). https:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"18_CR19","unstructured":"Smith, S.L., Dherin, B., Barrett, D.G.T., De, S.: On the origin of implicit regularization in stochastic gradient descent (2021). https:\/\/arxiv.org\/abs\/2101.12176"},{"key":"18_CR20","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems (1999). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1999\/file\/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf"},{"issue":"3\u20134","key":"18_CR21","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/bf00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992). https:\/\/doi.org\/10.1007\/bf00992696","journal-title":"Mach. Learn."},{"key":"18_CR22","unstructured":"Zhao, Y., Zhang, H., Hu, X.: Penalizing gradient norm for efficiently improving generalization in deep learning. In: Proceedings of the 39th International Conference on Machine Learning, pp. 26982\u201326992 (2022). https:\/\/proceedings.mlr.press\/v162\/zhao22i.html"},{"key":"18_CR23","unstructured":"Zhuang, J., et al.: Surrogate gap minimization improves sharpness-aware training (2022). https:\/\/arxiv.org\/abs\/2203.08065"}],"container-title":["Lecture Notes in Computer Science","Learning and Intelligent Optimization"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-09192-5_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T09:28:28Z","timestamp":1780651708000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-09192-5_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032091918","9783032091925"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-09192-5_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"LION","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Learning and Intelligent Optimization","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"lion2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/lion19.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}