{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T18:03:34Z","timestamp":1779991414666,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,29]]},"DOI":"10.1145\/3774905.3795844","type":"proceedings-article","created":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T17:14:56Z","timestamp":1779988496000},"page":"1366-1372","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Sparse Causal Latent Features for Robust Multimodal Learning under Distribution Shifts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2880-3097","authenticated-orcid":false,"given":"Liang","family":"Cao","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology, CAMBRIDGE, MA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9855-4479","authenticated-orcid":false,"given":"Weide","family":"Liu","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4184-9022","authenticated-orcid":false,"given":"Yan","family":"Qin","sequence":"additional","affiliation":[{"name":"School of Automation, Chongqing University, Chongqing, Chongqing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1719-0328","authenticated-orcid":false,"given":"Zhenghua","family":"Chen","sequence":"additional","affiliation":[{"name":"The Agency for Science, Technology and Research, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0563-1760","authenticated-orcid":false,"given":"Zhuo","family":"Chen","sequence":"additional","affiliation":[{"name":"PengCheng Laboratory, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4321-0468","authenticated-orcid":false,"given":"Bhushan","family":"Gopaluni","sequence":"additional","affiliation":[{"name":"University of British Columbia, Vancouver, BC, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-020-00257-z"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1334"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00522"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2024.3412006"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3382880"},{"key":"e_1_3_2_1_6_1","volume-title":"Invariant risk minimization. arXiv preprint arXiv:1907.02893","author":"Arjovsky Martin","year":"2019","unstructured":"Martin Arjovsky, L\u00e9on Bottou, Ishaan Gulrajani, and David Lopez-Paz. 2019. Invariant risk minimization. arXiv preprint arXiv:1907.02893 (2019)."},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Learning Representations.","author":"Sagawa Shiori","year":"2020","unstructured":"Shiori Sagawa, Pang Wei Koh, Tatsunori B. Hashimoto, and Percy Liang. 2020. Distributionally robust neural networks for group shifts: On the importance of regularization for worst-case generalization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Learning Representations.","author":"Gulrajani Ishaan","year":"2021","unstructured":"Ishaan Gulrajani and David Lopez-Paz. 2021. In search of lost domain generalization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_9_1","volume-title":"International Conference on Learning Representations.","author":"Rosenfeld Elan","year":"2021","unstructured":"Elan Rosenfeld, Pradeep Ravikumar, and Andrej Risteski. 2021. The risks of invariant risk minimization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_10_1","first-page":"3438","article-title":"Invariance principle meets information bottleneck for out-of-distribution generalization","volume":"34","author":"Ahuja Kartik","year":"2021","unstructured":"Kartik Ahuja, Ethan Caballero, Dinghuai Zhang, Jean-Christophe Gagnon-Audet, Yoshua Bengio, Ioannis Mitliagkas, and Irina Rish. 2021. Invariance principle meets information bottleneck for out-of-distribution generalization. In Advances in Neural Information Processing Systems, Vol. 34. 3438-3450.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00804"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 29th International Conference on Machine Learning (ICML).","author":"Sch\u00f6lkopf Bernhard","year":"2012","unstructured":"Bernhard Sch\u00f6lkopf, Dominik Janzing, Jonas Peters, Eleni Sgouritsa, Kun Zhang, and Joris Mooij. 2012. On causal and anticausal learning. In Proceedings of the 29th International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_13_1","volume-title":"Elements of Causal Inference: Foundations and Learning Algorithms","author":"Peters Jonas","unstructured":"Jonas Peters, Dominik Janzing, and Bernhard Sch\u00f6lkopf. 2017. Elements of Causal Inference: Foundations and Learning Algorithms. MIT Press."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS). 859-868","author":"Hyv\u00e4rinen Aapo","year":"2019","unstructured":"Aapo Hyv\u00e4rinen, Hiroaki Sasaki, and Richard Turner. 2019. Nonlinear ICA using auxiliary variables and generalized contrastive learning. In Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS). 859-868."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS). 2207-2217","author":"Khemakhem Ilyes","year":"2020","unstructured":"Ilyes Khemakhem, Diederik P. Kingma, Ricardo Monti, and Aapo Hyv\u00e4rinen. 2020. Variational autoencoders and nonlinear ICA: A unifying framework. In Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS). 2207-2217."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1129"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1996.tb02080.x"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 5th International Conference on Learning Representations (ICLR).","author":"Jang Eric","year":"2017","unstructured":"Eric Jang, Shixiang Gu, and Ben Poole. 2017. Categorical reparameterization with Gumbel-softmax. In Proceedings of the 5th International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 5th International Conference on Learning Representations (ICLR).","author":"Maddison Chris J.","year":"2017","unstructured":"Chris J. Maddison, Andriy Mnih, and Yee Whye Teh. 2017. The concrete distribution: A continuous relaxation of discrete random variables. In Proceedings of the 5th International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1111\/rssb.12167"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3058954"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755653"},{"key":"e_1_3_2_1_23_1","volume-title":"Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio, Nicholas L\u00e9onard, and Aaron Courville. 2013. Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 (2013)."},{"key":"e_1_3_2_1_24_1","first-page":"1","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"Ganin Yaroslav","year":"2016","unstructured":"Yaroslav Ganin, Evgeniya Ustinova, Hana Ajakan, Pascal Germain, Hugo Larochelle, Fran\u00e7ois Laviolette, Mario Marchand, and Victor S. Lempitsky. 2016. Domain-adversarial training of neural networks. Journal of Machine Learning Research 17, 59 (2016), 1-35.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (ICML). 5815-5826","author":"Krueger David","year":"2021","unstructured":"David Krueger, Ethan Caballero, Joern-Henrik Jacobsen, Amy Zhang, Jonathan Binas, Dinghuai Zhang, R\u00e9mi Le Priol, and Aaron Courville. 2021. Out-of-distribution generalization via risk extrapolation (REx). In Proceedings of the 38th International Conference on Machine Learning (ICML). 5815-5826."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49409-8_35"},{"key":"e_1_3_2_1_27_1","first-page":"723","volume-title":"Journal of Machine Learning Research","author":"Gretton Arthur","year":"2012","unstructured":"Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, Bernhard Sch\u00f6lkopf, and Alexander J. Smola. 2012. A kernel two-sample test. Journal of Machine Learning Research 13, Mar (2012), 723-773."},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Zhang Hongyi","year":"2018","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, and David Lopez-Paz. 2018. mixup: Beyond empirical risk minimization. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_29_1","first-page":"1256","article-title":"Gradient starvation: A learning proclivity in neural networks","volume":"34","author":"Pezeshki Mohammad","year":"2021","unstructured":"Mohammad Pezeshki, S\u00e9kou-Oumar Kaba, Yoshua Bengio, Aaron Courville, Doina Precup, and Guillaume Lajoie. 2021. Gradient starvation: A learning proclivity in neural networks. In Advances in Neural Information Processing Systems (NeurIPS), Vol. 34. 1256-1272.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"}],"event":{"name":"WWW '26: The ACM Web Conference 2026","location":"Dubai United Arab Emirates","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM Web Conference 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774905.3795844","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T17:17:41Z","timestamp":1779988661000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774905.3795844"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,28]]},"references-count":29,"alternative-id":["10.1145\/3774905.3795844","10.1145\/3774905"],"URL":"https:\/\/doi.org\/10.1145\/3774905.3795844","relation":{},"subject":[],"published":{"date-parts":[[2026,5,28]]},"assertion":[{"value":"2026-05-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}