{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T14:50:48Z","timestamp":1775487048232,"version":"3.50.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T00:00:00Z","timestamp":1759190400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T00:00:00Z","timestamp":1759190400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Chongqing Urban Management Scientific Research Project","award":["E20250107"],"award-info":[{"award-number":["E20250107"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00371-025-04168-1","type":"journal-article","created":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T04:52:59Z","timestamp":1759207979000},"page":"12497-12510","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Efficient semantic segmentation across domains: enhancing generalization with multi-scale and simple attention modules"],"prefix":"10.1007","volume":"41","author":[{"given":"Yuan","family":"Luo","sequence":"first","affiliation":[]},{"given":"Junlei","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Zerui","family":"Yao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,30]]},"reference":[{"key":"4168_CR1","doi-asserted-by":"publisher","first-page":"102805","DOI":"10.1016\/j.cviu.2019.102805","volume":"189","author":"G Guo","year":"2019","unstructured":"Guo, G., Zhang, N.: A survey on deep learning based face recognition. Comput. Vis. Image Underst. 189, 102805 (2019)","journal-title":"Comput. Vis. Image Underst."},{"key":"4168_CR2","doi-asserted-by":"crossref","unstructured":"Rao Q, Frtunikj J.: Deep learning for self-driving cars: chances and challenges. In: Proceedings of the 1st international workshop on software engineering for AI in autonomous systems, pp. 35\u201338 (2018)","DOI":"10.1145\/3194085.3194087"},{"issue":"2","key":"4168_CR3","first-page":"63","volume":"36","author":"HR Roth","year":"2018","unstructured":"Roth, H.R., Shen, C., Oda, H., Oda, M., Hayashi, Y., Misawa, K., Mori, K.: Deep learning and its application to medical image segmentation. Med. Imaging Technol. 36(2), 63\u201371 (2018)","journal-title":"Med. Imaging Technol."},{"key":"4168_CR4","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/s10994-009-5152-4","volume":"79","author":"S Ben-David","year":"2010","unstructured":"Ben-David, S., Blitzer, J., Crammer, K., Kulesza, A., Pereira, F., Vaughan, J.W.: A theory of learning from different domains. Mach. Learn. 79, 151\u2013175 (2010)","journal-title":"Mach. Learn."},{"issue":"1","key":"4168_CR5","first-page":"46","volume":"22","author":"G Blanchard","year":"2021","unstructured":"Blanchard, G., Deshmukh, A.A., Dogan, \u00dc., Lee, G., Scott, C.: Domain generalization by marginal transfer learning. J. Mach. Learn. Res. 22(1), 46\u2013100 (2021)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"4168_CR6","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1016\/j.patcog.2011.06.019","volume":"45","author":"JG Moreno-Torres","year":"2012","unstructured":"Moreno-Torres, J.G., Raeder, T., Alaiz-Rodr\u00edguez, R., Chawla, N.V., Herrera, F.: A unifying view on dataset shift in classification. Pattern Recognit. 45(1), 521\u2013530 (2012)","journal-title":"Pattern Recognit."},{"key":"4168_CR7","unstructured":"Recht, B., Roelofs, R., Schmidt, L., Shankar, V.: Do ImageNet classifiers generalize to imagenet? In: International conference on machine learning. PMLR, pp. 5389\u20135400 (2019)"},{"key":"4168_CR8","first-page":"18583","volume":"33","author":"R Taori","year":"2020","unstructured":"Taori, R., Dave, A., Shankar, V., Carlini, N., Recht, B., Schmidt, L.: Measuring robustness to natural distribution shifts in image classification. Adv. Neural Inform. Process. Syst. 33, 18583\u201318599 (2020)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"4168_CR9","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/s13735-017-0141-z","volume":"7","author":"Y Guo","year":"2018","unstructured":"Guo, Y., Liu, Y., Georgiou, T., Lew, M.S.: A review of semantic segmentation using deep neural networks. Int. J. Multimedia Inf. Retr. 7, 87\u201393 (2018)","journal-title":"Int. J. Multimedia Inf. Retr."},{"key":"4168_CR10","unstructured":"Muandet K, Balduzzi D, Sch\u00f6lkopf B.: Domain generalization via invariant feature representation. In: International conference on machine learning. PMLR, pp. 10\u201318 (2013)"},{"key":"4168_CR11","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/s10462-024-10817-z","volume":"57","author":"TH Rafi","year":"2024","unstructured":"Rafi, T.H., Mahjabin, R., Ghosh, E., Ko, Y.W., Lee, J.G.: Domain generalization for semantic segmentation: a survey. Artif. Intell. Rev. 57, 247 (2024)","journal-title":"Artif. Intell. Rev."},{"key":"4168_CR12","first-page":"37","volume-title":"Computer Vision - ECCV 2024","author":"B Pak","year":"2024","unstructured":"Pak, B., Woo, B., Kim, S., Kim, D.H., Kim, H.: Textual query-driven mask transformer for domain generalized segmentation. In: Computer Vision - ECCV 2024, pp. 37\u201354. Springer, Cham (2024)"},{"key":"4168_CR13","doi-asserted-by":"publisher","first-page":"6821","DOI":"10.1109\/TMM.2022.3214776","volume":"25","author":"J Zhu","year":"2022","unstructured":"Zhu, J., Zhang, Q., Fei, L., Cai, R., Xie, Y., Sheng, B., Yang, X.: Fffn: frame-by-frame feedback fusion network for video super-resolution. IEEE Trans. Multi. 25, 6821\u20136835 (2022)","journal-title":"IEEE Trans. Multi."},{"issue":"12","key":"4168_CR14","doi-asserted-by":"publisher","first-page":"3446","DOI":"10.1109\/TMI.2021.3087857","volume":"40","author":"R Liu","year":"2021","unstructured":"Liu, R., Liu, M., Sheng, B., Li, H., Li, P., Song, H., Zhang, P., Jiang, L., Shen, D.: Nhbs-net: a feature fusion attention network for ultrasound neonatal hip bone segmentation. IEEE Trans. Med. Imaging 40(12), 3446\u20133458 (2021)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"4168_CR15","doi-asserted-by":"crossref","unstructured":"Ouyang, D., He, S., Zhang, G., Luo, M., Guo, H., Zhan, J., Huang, Z.: Efficient multi-scale attention module with cross-spatial learning. In: ICASSP 2023\u20132023 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp. 1\u20135. IEEE, (2023).","DOI":"10.1109\/ICASSP49357.2023.10096516"},{"key":"4168_CR16","unstructured":"Yang, Lingxiao, Ru-Yuan Zhang, Lida Li, and Xiaohua Xie.: Simam: A simple, parameter-free attention module for convolutional neural networks. In: International conference on machine learning, pp. 11863\u201311874. PMLR, (2021)"},{"key":"4168_CR17","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pages 3213\u20133223. 1, 2, 3, 5, 6 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"4168_CR18","doi-asserted-by":"crossref","unstructured":"Yu, F., Chen, H., Wang, X., Xian, W., Chen, Y., Liu, F., Madhavan, V., Darrell, T.: BDD100k: A diverse driving dataset for heterogeneous multitask learning. In: CVPR 9, 4 (2020)","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"4168_CR19","doi-asserted-by":"crossref","unstructured":"Tang, Z., Gao, Y., Zhu, Y., Zhang, Z., Li, M., Metaxas, D.N.: Crossnorm and selfnorm for generalization under distribution shifts. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 52\u201361. (2021)","DOI":"10.1109\/ICCV48922.2021.00012"},{"key":"4168_CR20","doi-asserted-by":"crossref","unstructured":"Kim, J., Lee, J., Park, J., Min, D., Sohn, K.: Pin the memory: Learning to generalize semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4350\u20134360. 2, 6, 10 (2022)","DOI":"10.1109\/CVPR52688.2022.00431"},{"key":"4168_CR21","unstructured":"Richter, S.R., Vineet, V., Roth, S., Koltun, V.: Playing for data: Ground truth from computer games. In: ECCV 2, 9, 3, 5, 7 (2016)"},{"key":"4168_CR22","doi-asserted-by":"crossref","unstructured":"Choi, S., Jung, S., Yun, H., Kim, J.T., Kim, S., Choo, J.: RobustNet: Improving domain generalization in urban-scene segmentation via instance selective whitening. In: CVPR 1, 3 (2021)","DOI":"10.1109\/CVPR46437.2021.01141"},{"key":"4168_CR23","doi-asserted-by":"crossref","unstructured":"Pan, X., Luo, P., Shi, J., Tang, X.: Two at once: Enhancing learning and generalization capacities via ibn-net. In: ECCV 1, 3 (2018)","DOI":"10.1007\/978-3-030-01225-0_29"},{"key":"4168_CR24","doi-asserted-by":"crossref","unstructured":"Wei, Z., Chen, L., Jin, Y., Ma, X., Liu, T., Ling, P., Wang, B., Chen, H. and Zheng, J.: Stronger fewer & superior: Harnessing vision foundation models for domain generalized semantic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 28619\u201328630. (2024)","DOI":"10.1109\/CVPR52733.2024.02704"},{"key":"4168_CR25","doi-asserted-by":"crossref","unstructured":"Li J, Gao M, Wei L, Tang S, Zhang W, Li M, Ji W, Tian Q, Chua T-S, Zhuang Y.: Gradientregulated meta-prompt learning for generalizable vision-language models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 2551\u20132562 (2023)","DOI":"10.1109\/ICCV51070.2023.00241"},{"key":"4168_CR26","doi-asserted-by":"publisher","unstructured":"Wang L, Jin Y, Chen Z, Wu J, Li M, Lu Y, Wang H.: Transitive vision-language prompt learning for domain generalization. arXiv preprint. arXiv: 2404. 18758 (2024). https:\/\/doi.org\/10.48550\/arXiv.2404.18758","DOI":"10.48550\/arXiv.2404.18758"},{"key":"4168_CR27","doi-asserted-by":"publisher","unstructured":"Chen Z, Wang W, Zhao Z, Su F, Men A, Meng H.: PracticalDG: perturbation distillation on visionlanguage models for hybrid domain generalization. arXiv preprint. arXiv:2404.09011 (2024). https:\/\/doi.org\/10.48550\/arXiv.2404.09011","DOI":"10.48550\/arXiv.2404.09011"},{"key":"4168_CR28","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning transferable visual models from natural language supervision. In: ICML 1, 2, 3, 4, 6, 9, 10 (2021)"},{"key":"4168_CR29","doi-asserted-by":"publisher","unstructured":"Fang, Y., Sun, Q., Wang, X., Huang, T., Wang, X., Cao, Y.: EVA-02: A visual representation for neon genesis. arXiv preprint arXiv:2303.11331 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.11331","DOI":"10.48550\/arXiv.2303.11331"},{"key":"4168_CR30","doi-asserted-by":"crossref","unstructured":"Fang, Y., Wang, W., Xie, B., Sun, Q., Wu, L., Wang, X., Huang, T., Wang, X., Cao, Y.: EVA: Exploring the limits of masked visual representation learning at scale. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01855"},{"key":"4168_CR31","unstructured":"Chen, Y., Kalantidis, Y., Li, J., Yan, S., Feng, J.: A2-nets: Double attention networks. Adv. Neural Inf. Process. Syst., 31 (2018)"},{"key":"4168_CR32","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"4168_CR33","unstructured":"Tao, A., Sapra K., Catanzaro B. Hierarchical multi-scale attention for semantic segmentation. arXiv preprint arXiv:2005.10821 (2020)."},{"key":"4168_CR34","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 13713\u201313722 (2021).","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"4168_CR35","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.-Y., and So Kweon, I. CBAM.: Convolutional Block Attention Module. In European Conference on Computer Vision, pp. 3\u201319, (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"4168_CR36","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., and Sun, G.: Squeeze-and-excitation networks. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141, (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"50","key":"4168_CR37","doi-asserted-by":"publisher","first-page":"11666","DOI":"10.1523\/JNEUROSCI.3414-05.2005","volume":"25","author":"BS Webb","year":"2005","unstructured":"Webb, B.S., Dhruv, N.T., Solomon, S.G., Tailby, C., Lennie, P.: Early and late mechanisms of surround suppression in striate cortex of macaque. J. Neurosci. 25(50), 11666\u201311675 (2005)","journal-title":"J. Neurosci."},{"key":"4168_CR38","doi-asserted-by":"crossref","unstructured":"Khademi, M.: Multimodal neural graph memory networks for visual question answering. In: Proceedings of the 58th annual meeting of the association for computational linguistics. pp. 7177\u20137188. (2020)","DOI":"10.18653\/v1\/2020.acl-main.643"},{"key":"4168_CR39","unstructured":"Kim, W., Son, B. and Kim, I.: Vision-and-language transformer without convolution or region supervision. In: International conference on machine learning, pp. 5583\u20135594. PMLR, (2021)"},{"key":"4168_CR40","doi-asserted-by":"crossref","unstructured":"Neuhold, G., Ollmann, T., Rota Bulo, S., Kontschieder, P.: The mapillary vistas dataset for semantic understanding of street scenes. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.534"},{"key":"4168_CR41","doi-asserted-by":"publisher","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020). https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"key":"4168_CR42","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: Deformable transformers for end-to-end object detection. ICLR (2020)"},{"key":"4168_CR43","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"4168_CR44","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. ICLR (2019)"},{"key":"4168_CR45","doi-asserted-by":"publisher","unstructured":"Goyal, P., Doll\u00e1r, P., Girshick, R., Noordhuis, P., Wesolowski, L., Kyrola, A., Tulloch, A., Jia, Y., He, K.: Accurate, large minibatch sgd: Training ImageNet in 1 hour. arXiv preprint arXiv:1706.02677 (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.02677","DOI":"10.48550\/arXiv.1706.02677"},{"key":"4168_CR46","doi-asserted-by":"crossref","unstructured":"Hoyer, L., Dai, D., Van Gool, L.: DAFormer: Improving network architectures and training strategies for domain-adaptive semantic segmentation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00969"},{"key":"4168_CR47","doi-asserted-by":"crossref","unstructured":"Peng, D., Lei, Y., Hayat, M., Guo, Y., Li, W.: Semantic-aware domain generalized segmentation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00262"},{"key":"4168_CR48","doi-asserted-by":"crossref","unstructured":"Lee, S., Seong, H., Lee, S., Kim, E.: WildNet: Learning domain generalized semantic segmentation from the wild. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00970"},{"key":"4168_CR49","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Zhong, Z., Zhao, N., Sebe, N., Lee, G.H.: Style-hallucinated dual consistency learning for domain generalized semantic segmentation. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19815-1_31"},{"key":"4168_CR50","doi-asserted-by":"crossref","unstructured":"Kim, S., Kim, D.h., Kim, H.: Texture learning domain randomization for domain generalized segmentation. ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00069"},{"key":"4168_CR51","doi-asserted-by":"publisher","unstructured":"Fahes, M., Vu, T.H., Bursuc, A., P\u00e9rez, P., de Charette, R.: A simple recipe for language-guided domain generalized segmentation. arXiv preprint arXiv:2311.17922 (2023). https:\/\/doi.org\/10.48550\/arXiv.2311.17922","DOI":"10.48550\/arXiv.2311.17922"},{"key":"4168_CR52","doi-asserted-by":"publisher","unstructured":"Sun, Q., Chen, H., Zheng, M., Wu, Z., Felsberg, M., Tang, Y.: IBAFormer: Intrabatch Attention Transformer for Domain Generalized Semantic Segmentation. arXiv preprint arXiv:2309.06282 (2023). https:\/\/doi.org\/10.48550\/arXiv.2309.06282","DOI":"10.48550\/arXiv.2309.06282"},{"key":"4168_CR53","doi-asserted-by":"publisher","unstructured":"H\u00fcmmer, C., Schwonberg, M., Zhong, L., Cao, H., Knoll, A., Gottschalk, H.: VLTSeg: Simple transfer of CLIP-based vision-language representations for domain generalized semantic segmentation. arXiv preprint arXiv:2312.02021 (2023). https:\/\/doi.org\/10.48550\/arXiv.2312.02021","DOI":"10.48550\/arXiv.2312.02021"},{"key":"4168_CR54","doi-asserted-by":"crossref","unstructured":"Ding, J., Xue, N., Xia, G.S., Schiele, B., Dai, D.: Hgformer: Hierarchical grouping transformer for domain generalized semantic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 15413\u201315423 (2023).","DOI":"10.1109\/CVPR52729.2023.01479"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04168-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-04168-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04168-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,20]],"date-time":"2025-11-20T13:17:01Z","timestamp":1763644621000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-04168-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,30]]},"references-count":54,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4168"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-04168-1","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,30]]},"assertion":[{"value":"12 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}