{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T03:07:25Z","timestamp":1778728045889,"version":"3.51.4"},"reference-count":57,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T00:00:00Z","timestamp":1776816000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.artint.2026.104544","type":"journal-article","created":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T06:47:29Z","timestamp":1776926849000},"page":"104544","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Learning gradient-based mixup with extrapolation toward flatter minima for domain generalization"],"prefix":"10.1016","volume":"356","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0884-9597","authenticated-orcid":false,"given":"Danni","family":"Peng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6565-3836","authenticated-orcid":false,"given":"Sinno Jialin","family":"Pan","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.artint.2026.104544_bib0001","first-page":"2178","article-title":"Generalizing from several related classification tasks to a new unlabeled sample","volume":"24","author":"Blanchard","year":"2011","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2026.104544_bib0002","series-title":"International Conference on Machine Learning","first-page":"10","article-title":"Domain generalization via invariant feature representation","author":"Muandet","year":"2013"},{"key":"10.1016\/j.artint.2026.104544_bib0003","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"5400","article-title":"Domain generalization with adversarial feature learning","author":"Li","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0004","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"5542","article-title":"Deeper, broader and artier domain generalization","author":"Li","year":"2017"},{"key":"10.1016\/j.artint.2026.104544_bib0005","series-title":"Thirty-Second AAAI Conference on Artificial Intelligence","article-title":"Learning to generalize: meta-learning for domain generalization","author":"Li","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0006","series-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","first-page":"1006","article-title":"MetaReg: towards domain generalization using meta-regularization","author":"Balaji","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0007","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"1446","article-title":"Episodic training for domain generalization","author":"Li","year":"2019"},{"key":"10.1016\/j.artint.2026.104544_bib0008","series-title":"International Conference on Learning Representations","article-title":"Mixup: beyond empirical risk minimization","author":"Zhang","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0009","series-title":"International Conference on Machine Learning","first-page":"6438","article-title":"Manifold mixup: better representations by interpolating hidden states","author":"Verma","year":"2019"},{"key":"10.1016\/j.artint.2026.104544_bib0010","doi-asserted-by":"crossref","first-page":"3050","DOI":"10.1109\/TNNLS.2020.3049011","article-title":"MetaMixUp: learning adaptive interpolation policy of MixUp with metalearning","volume":"33","author":"Mai","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.artint.2026.104544_bib0011","series-title":"International Conference on Learning Representations","article-title":"Domain generalization with mixstyle","author":"Zhou","year":"2020"},{"key":"10.1016\/j.artint.2026.104544_bib0012","series-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"3622","article-title":"Heterogeneous domain generalization via domain mixup","author":"Wang","year":"2020"},{"key":"10.1016\/j.artint.2026.104544_bib0013","series-title":"International Conference on Learning Representations","article-title":"Learning explanations that are hard to vary","author":"Parascandolo","year":"2020"},{"key":"10.1016\/j.artint.2026.104544_bib0014","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6630","article-title":"Domain generalization via gradient surgery","author":"Mansilla","year":"2021"},{"key":"10.1016\/j.artint.2026.104544_bib0015","unstructured":"Y. Shi, J. Seely, P.H.S. Torr, N. Siddharth, A. Hannun, N. Usunier, G. Synnaeve, Gradient Matching for Domain Generalization, (2021). arXiv: 2104.09937."},{"key":"10.1016\/j.artint.2026.104544_bib0016","unstructured":"A.E. Eshratifar, D. Eigen, M. Pedram, Gradient agreement as an optimization objective for meta-learning, (2018). arXiv: 1810.08178."},{"key":"10.1016\/j.artint.2026.104544_bib0017","unstructured":"Y. Du, W.M. Czarnecki, S.M. Jayakumar, M. Farajtabar, R. Pascanu, B. Lakshminarayanan, Adapting auxiliary losses using gradient similarity, (2018). arXiv: 1812.02224."},{"issue":"1","key":"10.1016\/j.artint.2026.104544_bib0018","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/neco.1997.9.1.1","article-title":"Flat minima","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.artint.2026.104544_bib0019","series-title":"5th International Conference on Learning Representations, ICLR 2017","article-title":"Entropy-SGD: biasing gradient descent into wide valleys (international conference on learning representations, ICLR 2017)","author":"Chaudhari","year":"2019"},{"key":"10.1016\/j.artint.2026.104544_bib0020","series-title":"International Conference on Learning Representations","article-title":"Sharpness-aware minimization for efficiently improving generalization","author":"Foret","year":"2020"},{"key":"10.1016\/j.artint.2026.104544_bib0021","series-title":"34th Conference on Uncertainty in Artificial Intelligence 2018, UAI 2018","first-page":"876","article-title":"Averaging weights leads to wider optima and better generalization","author":"Izmailov","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0022","unstructured":"H. Guo, J. Jin, B. Liu, Stochastic Weight Averaging Revisited, (2022). arXiv: 2201.00519."},{"key":"10.1016\/j.artint.2026.104544_bib0023","series-title":"NIPS","article-title":"Vicinal risk minimization","author":"Chapelle","year":"2000"},{"key":"10.1016\/j.artint.2026.104544_bib0024","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6023","article-title":"CutMix: regularization strategy to train strong classifiers with localizable features","author":"Yun","year":"2019"},{"key":"10.1016\/j.artint.2026.104544_bib0025","series-title":"European Conference on Computer Vision","first-page":"95","article-title":"Remix: rebalanced mixup","author":"Chou","year":"2020"},{"key":"10.1016\/j.artint.2026.104544_bib0026","unstructured":"G. Giannone, S. Havrylov, J. Massiah, E. Yilmaz, Y. Jiao, Just Mix Once: Worst-group Generalization by Group Interpolation, (2022). arXiv: 2210.12195."},{"key":"10.1016\/j.artint.2026.104544_bib0027","doi-asserted-by":"crossref","first-page":"14345","DOI":"10.52202\/068431-1043","article-title":"Selecmix: debiased learning by contradicting-pair sampling","volume":"35","author":"Hwang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2026.104544_bib0028","series-title":"International Conference on Machine Learning","first-page":"25407","article-title":"Improving out-of-distribution robustness via selective augmentation","author":"Yao","year":"2022"},{"key":"10.1016\/j.artint.2026.104544_bib0029","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"3714","article-title":"Mixup as locally linear out-of-manifold regularization","volume":"33","author":"Guo","year":"2019"},{"key":"10.1016\/j.artint.2026.104544_bib0030","series-title":"International Conference on Learning Representations","article-title":"Learning to learn without forgetting by maximizing transfer and minimizing interference","author":"Riemer","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0031","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"Yu","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2026.104544_bib0032","unstructured":"S. Shahtalebi, J.-C. Gagnon-Audet, T. Laleh, M. Faramarzi, K. Ahuja, I. Rish, Sand-mask: An enhanced gradient masking strategy for the discovery of invariances in domain generalization, (2021). arXiv: 2106.02266."},{"key":"10.1016\/j.artint.2026.104544_bib0033","unstructured":"A. Rame, C. Dancette, M. Cord, Fishr: Invariant gradient variances for out-of-distribution generalization, (2021). arXiv: 2109.02934."},{"key":"10.1016\/j.artint.2026.104544_bib0034","series-title":"5th International Conference on Learning Representations, ICLR 2017","article-title":"On large-batch training for deep learning: generalization gap and sharp minima","author":"Keskar","year":"2017"},{"issue":"3","key":"10.1016\/j.artint.2026.104544_bib0035","doi-asserted-by":"crossref","first-page":"448","DOI":"10.1162\/neco.1992.4.3.448","article-title":"A practical Bayesian framework for backpropagation networks","volume":"4","author":"MacKay","year":"1992","journal-title":"Neural Comput."},{"key":"10.1016\/j.artint.2026.104544_bib0036","series-title":"Advances in Neural Information Processing Systems","article-title":"SWAD: Domain generalization by seeking flat minima","author":"Cha","year":"2021"},{"key":"10.1016\/j.artint.2026.104544_bib0037","unstructured":"D. Arpit, H. Wang, Y. Zhou, C. Xiong, Ensemble of averages: Improving model selection and boosting performance in domain generalization, (2021). arXiv: 2110.10832."},{"key":"10.1016\/j.artint.2026.104544_bib0038","series-title":"International Conference on Learning Representations","article-title":"Understanding and improving interpolation in autoencoders via an adversarial regularizer","author":"Berthelot","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0039","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1199","article-title":"Learning to compare: relation network for few-shot learning","author":"Sung","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0040","series-title":"Proceedings of the 26th International Conference on World Wide Web","first-page":"173","article-title":"Neural collaborative filtering","author":"He","year":"2017"},{"key":"10.1016\/j.artint.2026.104544_bib0041","series-title":"Advances in Neural Information Processing Systems","article-title":"Relative flatness and generalization","author":"Petzka","year":"2021"},{"key":"10.1016\/j.artint.2026.104544_bib0042","first-page":"2672","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2026.104544_bib0043","unstructured":"A. Makhzani, J. Shlens, N. Jaitly, I. Goodfellow, B. Frey, Adversarial autoencoders, (2015). arXiv: 1511.05644."},{"key":"10.1016\/j.artint.2026.104544_bib0044","unstructured":"I. Gulrajani, D. Lopez-Paz, In search of lost domain generalization, (2020). arXiv: 2007.01434."},{"key":"10.1016\/j.artint.2026.104544_bib0045","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1657","article-title":"Unbiased metric learning: on the utilization of multiple datasets and web images for softening bias","author":"Fang","year":"2013"},{"key":"10.1016\/j.artint.2026.104544_bib0046","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"5018","article-title":"Deep hashing network for unsupervised domain adaptation","author":"Venkateswara","year":"2017"},{"key":"10.1016\/j.artint.2026.104544_bib0047","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"456","article-title":"Recognition in terra incognita","author":"Beery","year":"2018"},{"key":"10.1016\/j.artint.2026.104544_bib0048","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"1406","article-title":"Moment matching for multi-source domain adaptation","author":"Peng","year":"2019"},{"key":"10.1016\/j.artint.2026.104544_bib0049","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.artint.2026.104544_bib0050","unstructured":"P. Teterwak, K. Saito, T. Tsiligkaridis, K. Saenko, B.A. Plummer, ERM++: An Improved Baseline for Domain Generalization, (2023). arXiv: 2304.01973."},{"key":"10.1016\/j.artint.2026.104544_bib0051","series-title":"Statistical Learning Theory","author":"Vapnick","year":"1998"},{"key":"10.1016\/j.artint.2026.104544_bib0052","series-title":"European Conference on Computer Vision","first-page":"561","article-title":"Learning to generate novel domains for domain generalization","author":"Zhou","year":"2020"},{"key":"10.1016\/j.artint.2026.104544_bib0053","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"52","article-title":"Crossnorm and selfnorm for generalization under distribution shifts","author":"Tang","year":"2021"},{"key":"10.1016\/j.artint.2026.104544_bib0054","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8024","article-title":"Towards principled disentanglement for domain generalization","author":"Zhang","year":"2022"},{"key":"10.1016\/j.artint.2026.104544_bib0055","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8690","article-title":"Reducing domain gap by reducing style bias","author":"Nam","year":"2021"},{"key":"10.1016\/j.artint.2026.104544_bib0056","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"9619","article-title":"SelfReg: self-supervised contrastive regularization for domain generalization","author":"Kim","year":"2021"},{"key":"10.1016\/j.artint.2026.104544_bib0057","series-title":"European Conference on Computer Vision","first-page":"443","article-title":"Deep coral: correlation alignment for deep domain adaptation","author":"Sun","year":"2016"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370226000706?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370226000706?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T02:10:17Z","timestamp":1778724617000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370226000706"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":57,"alternative-id":["S0004370226000706"],"URL":"https:\/\/doi.org\/10.1016\/j.artint.2026.104544","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Learning gradient-based mixup with extrapolation toward flatter minima for domain generalization","name":"articletitle","label":"Article Title"},{"value":"Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.artint.2026.104544","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"104544"}}