{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:18:28Z","timestamp":1771949908432,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Science and Technology Major Project of Guangxi","award":["AA22096030 and AA22096032"],"award-info":[{"award-number":["AA22096030 and AA22096032"]}]},{"name":"National Key R&D Program of China under Grant","award":["2020AAA0109500 and 2020AAA0109501"],"award-info":[{"award-number":["2020AAA0109500 and 2020AAA0109501"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680906","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"8159-8168","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["Learning Spectral-Decomposited Tokens for Domain Generalized Semantic Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4249-3021","authenticated-orcid":false,"given":"Jingjun","family":"Yi","sequence":"first","affiliation":[{"name":"Wuhan University, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1047-4790","authenticated-orcid":false,"given":"Qi","family":"Bi","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7193-6242","authenticated-orcid":false,"given":"Hao","family":"Zheng","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4733-381X","authenticated-orcid":false,"given":"Haolan","family":"Zhan","sequence":"additional","affiliation":[{"name":"Monash University, Clayton, Victoria, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4059-5902","authenticated-orcid":false,"given":"Wei","family":"Ji","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, Connecticut, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9569-269X","authenticated-orcid":false,"given":"Yawen","family":"Huang","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8076-2619","authenticated-orcid":false,"given":"Yuexiang","family":"Li","sequence":"additional","affiliation":[{"name":"Guangxi Medical University, Nanning, Guangxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2195-2847","authenticated-orcid":false,"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab &amp; Westlake University, Shenzhen, Guangdong, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27839"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3290469"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27840"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27838"},{"key":"e_1_3_2_1_5_1","volume-title":"The Fast Fourier Transform and Its Applications","author":"Brigham E Oran","unstructured":"E Oran Brigham. 1988. The Fast Fourier Transform and Its Applications. Prentice-Hall, Inc."},{"key":"e_1_3_2_1_6_1","first-page":"5516","article-title":"Self-supervised learning across domains","volume":"44","author":"Bucci Silvia","year":"2021","unstructured":"Silvia Bucci, Antonio D'Innocente, Yujun Liao, Fabio M Carlucci, Barbara Caputo, and Tatiana Tommasi. 2021. Self-supervised learning across domains. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 9 (2021), 5516--5528.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_7_1","volume-title":"Comparison of Spectral Decomposition Methods. First break","author":"Castagna John P","year":"2006","unstructured":"John P Castagna and Shengjie Sun. 2006. Comparison of Spectral Decomposition Methods. First break, Vol. 24, 3 (2006)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00051"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00742"},{"key":"e_1_3_2_1_10_1","first-page":"16664","article-title":"Adaptformer: Adapting Vision Transformers for Scalable Visual Recognition","volume":"35","author":"Chen Shoufa","year":"2022","unstructured":"Shoufa Chen, Chongjian Ge, Zhan Tong, Jiangliu Wang, Yibing Song, Jue Wang, and Ping Luo. 2022. Adaptformer: Adapting Vision Transformers for Scalable Visual Recognition. Advances in Neural Information Processing Systems, Vol. 35 (2022), 16664--16678.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109086"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01713"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01141"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00343"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_17_1","volume-title":"C Mohan, Abhinav Kumar, and Vineeth N Balasubramanian.","author":"Dayal Aveen","year":"2024","unstructured":"Aveen Dayal, Vimal KB, Linga Reddy Cenkeramaddi, C Mohan, Abhinav Kumar, and Vineeth N Balasubramanian. 2024. MADG: Margin-based Adversarial Learning for Domain Generalization. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01479"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01855"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_21_1","volume-title":"Lora: Low-Rank Adaptation of Large Language Models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-Rank Adaptation of Large Language Models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00682"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00501"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00299"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-023-1385-0"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611738"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00112"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01216"},{"key":"e_1_3_2_1_29_1","volume-title":"Visual Prompt Tuning. In European Conference on Computer Vision. Springer, 709--727","author":"Jia Menglin","year":"2022","unstructured":"Menglin Jia, Luming Tang, Bor-Chun Chen, Claire Cardie, Serge Belongie, Bharath Hariharan, and Ser-Nam Lim. 2022. Visual Prompt Tuning. In European Conference on Computer Vision. Springer, 709--727."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00948"},{"key":"e_1_3_2_1_31_1","volume-title":"Segment Anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 4015--4026","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C Berg, Wan-Yen Lo, et al. 2023. Segment Anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 4015--4026."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01133"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00970"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00058"},{"key":"e_1_3_2_1_35_1","volume-title":"Adversarial style mining for one-shot unsupervised domain adaptation. Advances in neural information processing systems","author":"Luo Yawei","year":"2020","unstructured":"Yawei Luo, Ping Liu, Tao Guan, Junqing Yu, and Yi Yang. 2020. Adversarial style mining for one-shot unsupervised domain adaptation. Advances in neural information processing systems, Vol. 33 (2020), 20612--20623."},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 3001--3011","author":"Mirza M Jehanzeb","year":"2022","unstructured":"M Jehanzeb Mirza, Marc Masana, Horst Possegger, and Horst Bischof. 2022. An Efficient Domain-Incremental Learning Approach to Drive in All Weather Conditions. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 3001--3011."},{"key":"e_1_3_2_1_37_1","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Nam Hyeonseob","year":"2018","unstructured":"Hyeonseob Nam and Hyo-Eun Kim. 2018. Batch-Instance Normalization for Adaptively Style-Invariant Neural Networks. Advances in Neural Information Processing Systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.534"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00281"},{"key":"e_1_3_2_1_40_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning Robust Visual Features without Supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20098"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the European Conference on Computer Vision. 464--479","author":"Pan X.","unstructured":"X. Pan, P. Luo, J. Shi, and X. Tang. 2018. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. In Proceedings of the European Conference on Computer Vision. 464--479."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 1863--1871","author":"Pan X.","unstructured":"X. Pan, X. Zhan, J. Shi, X. Tang, and P. Luo. 2019. Switchable Whitening for Deep Representation Learning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 1863--1871."},{"key":"e_1_3_2_1_44_1","volume-title":"Interpretational Applications of Spectral Decomposition in Reservoir Characterization. The leading edge","author":"Partyka Greg","year":"1999","unstructured":"Greg Partyka, James Gridley, and John Lopez. 1999. Interpretational Applications of Spectral Decomposition in Reservoir Characterization. The leading edge, Vol. 18, 3 (1999), 353--360."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00262"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3096334"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1068\/p110337"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00057"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01526"},{"key":"e_1_3_2_1_50_1","volume-title":"International Conference on Machine Learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning Transferable Visual Models from Natural Language Supervision. In International Conference on Machine Learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_51_1","volume-title":"Zero-Shot Text-to-Image Generation. In International Conference on Machine Learning. PMLR, 8821--8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. In International Conference on Machine Learning. PMLR, 8821--8831."},{"key":"e_1_3_2_1_52_1","volume-title":"European Conference on Computer Vision (ECCV)","volume":"9906","author":"Richter S. R.","unstructured":"S. R. Richter, V. Vineet, S. Roth, and V. Koltun. 2016. Playing for data: Ground truth from computer games. In European Conference on Computer Vision (ECCV), Vol. 9906. 102--118."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.352"},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV).","author":"Sakaridis C.","unstructured":"C. Sakaridis, D. Dai, and L. Van Gool. 2021. ACDC: The Adverse Conditions Dataset with Correspondences for Semantic Driving Scene Understanding. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109115"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02353"},{"key":"e_1_3_2_1_57_1","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Volpi Riccardo","year":"2018","unstructured":"Riccardo Volpi, Hongseok Namkoong, Ozan Sener, John C Duchi, Vittorio Murino, and Silvio Savarese. 2018. Generalizing to Unseen Domains via Adversarial Data Augmentation. Advances in Neural Information Processing Systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548267"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00756"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02704"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20193"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109474"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01415"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00414"},{"key":"e_1_3_2_1_65_1","volume-title":"BDD100k: A Diverse Driving Video Database with Scalable Annotation Tooling. arXiv preprint arXiv:1805.04687","author":"Yu Fisher","year":"2018","unstructured":"Fisher Yu, Wenqi Xian, Yingying Chen, Fangchen Liu, Mike Liao, Vashisht Madhavan, and Trevor Darrell. 2018. BDD100k: A Diverse Driving Video Database with Scalable Annotation Tooling. arXiv preprint arXiv:1805.04687, Vol. 2, 5 (2018), 6."},{"key":"e_1_3_2_1_66_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 2100--2110","author":"Yue X.","unstructured":"X. Yue, Y. Zhang, S. Zhao, A. Sangiovanni-Vincentelli, K. Keutzer, and B. Gong. 2019. Domain Randomization and Pyramid Consistency: Simulation-to-Real Generalization Without Accessing Target Domain Data. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 2100--2110."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26320"},{"key":"e_1_3_2_1_68_1","volume-title":"Test-time Fourier Style Calibration for Domain Generalization. In 31st International Joint Conference on Artificial Intelligence, IJCAI","author":"Zhao Xingchen","year":"2022","unstructured":"Xingchen Zhao, Chang Liu, Anthony Sicilia, Seong Jae Hwang, and Yun Fu. 2022. Test-time Fourier Style Calibration for Domain Generalization. In 31st International Joint Conference on Artificial Intelligence, IJCAI 2022. 1721--1727."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00239"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_31"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i19.30184"},{"key":"e_1_3_2_1_72_1","volume-title":"Gim Hee Lee, and Nicu Sebe","author":"Zhong Zhun","year":"2022","unstructured":"Zhun Zhong, Yuyang Zhao, Gim Hee Lee, and Nicu Sebe. 2022. Adversarial Style Augmentation for Domain Generalized Urban-Scene Segmentation. In Advances in Neural Information Processing Systems."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680906","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680906","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:33Z","timestamp":1750295853000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680906"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":72,"alternative-id":["10.1145\/3664647.3680906","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680906","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}