{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T17:06:48Z","timestamp":1772644008249,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T00:00:00Z","timestamp":1766534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T00:00:00Z","timestamp":1766534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the open project fund of National Engineering Research Center of Digital Construction and Evaluation Technology of Urban Rail Transit","award":["2024023"],"award-info":[{"award-number":["2024023"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s00371-025-04307-8","type":"journal-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T18:51:04Z","timestamp":1766602264000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SemClip: enhancing domain generalization in object detection via style enhancement and semantic consistency"],"prefix":"10.1007","volume":"42","author":[{"given":"Runyi","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hanyuan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Zou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyong","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,24]]},"reference":[{"key":"4307_CR1","doi-asserted-by":"crossref","unstructured":"Choi, S., Jung, S., Yun, H., Kim, J.T., Kim, S., Choo, J.: Robustnet: Improving domain generalization in urban-scene segmentation via instance selective whitening. In 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Jun (2021)","DOI":"10.1109\/CVPR46437.2021.01141"},{"key":"4307_CR2","doi-asserted-by":"publisher","first-page":"10479","DOI":"10.1007\/s13369-023-07661-8","volume":"48","author":"S Chowdhury","year":"2023","unstructured":"Chowdhury, S., Soni, B.: QSFVQA: a time efficient, scalable and optimized VQA framework. Arab. J. Sci. Eng. 48, 10479\u201310491 (2023)","journal-title":"Arab. J. Sci. Eng."},{"key":"4307_CR3","doi-asserted-by":"crossref","unstructured":"Chowdhury, S., Soni, B.: Beyond words: ESC-Net revolutionizes VQA by elevating visual features and defying language priors. Comput. Intell. 40(6), e70010 (2024)","DOI":"10.1111\/coin.70010"},{"key":"4307_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.109948","volume":"142","author":"S Chowdhury","year":"2025","unstructured":"Chowdhury, S., Soni, B.: ENVQA: Improving visual question answering model by enriching the visual feature. Eng. Appl. Artif. Intell. 142, 109948 (2025)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"4307_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.129906","volume":"635","author":"S Chowdhury","year":"2025","unstructured":"Chowdhury, S., Soni, B.: Handling language prior and compositional reasoning issues in visual question answering system. Neurocomputing 635, 129906 (2025)","journal-title":"Neurocomputing"},{"key":"4307_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112827","volume":"309","author":"S Chowdhury","year":"2025","unstructured":"Chowdhury, S., Soni, B.: R-VQA: A robust visual question answering model. Knowl.-Based Syst. 309, 112827 (2025)","journal-title":"Knowl.-Based Syst."},{"key":"4307_CR7","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M.G.H., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. arXiv: Computer Vision and pattern recognition,arXiv: Computer Vision and Pattern Recognition, Apr (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"4307_CR8","doi-asserted-by":"crossref","unstructured":"Desai, K., Johnson, J.: Virtex: Learning visual representations from textual annotations. In 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Jun (2021)","DOI":"10.1109\/CVPR46437.2021.01101"},{"key":"4307_CR9","doi-asserted-by":"crossref","unstructured":"Engilberge, M., Chevallier, L., P\u00e9rez, P., Cord, M.: Finding beans in burgers: Deep semantic-visual embedding with localization. arXiv: Computer vision and pattern recognition,arXiv: Computer Vision and Pattern Recognition, Apr (2018)","DOI":"10.1109\/CVPR.2018.00419"},{"key":"4307_CR10","unstructured":"Faghri, F., Fleet, D.J., Kiros, J., Fidler, S.: Vse++: Improving visual-semantic embeddings with hard negatives. arXiv: Learning,arXiv: Learning, Jul (2017)"},{"key":"4307_CR11","unstructured":"Furuta, R., Sato, Y.: Semi-and weakly-supervised domain generalization for object detection"},{"key":"4307_CR12","doi-asserted-by":"crossref","unstructured":"Hassaballah, M., Kenk, M.A., Muhammad, K., Minaee, S.: Vehicle detection and tracking in adverse weather using a deep learning framework. In: IEEE transactions on intelligent transportation systems, page 4230\u20134242, Jul (2021)","DOI":"10.1109\/TITS.2020.3014013"},{"key":"4307_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), Jun (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4307_CR14","unstructured":"He, Z., Ni, H.: Single-domain generalized object detection by balancing domain diversity and invariance. arXiv preprint, arXiv:2502.03835, (2025). 6"},{"key":"4307_CR15","doi-asserted-by":"crossref","unstructured":"Huang, L., Zhou, Y., Zhu, F., Liu, L., Shao, L.: Iterative normalization: beyond standardization towards efficient whitening. Cornell University - arXiv, Cornell University - arXiv Apr (2019)","DOI":"10.1109\/CVPR.2019.00501"},{"key":"4307_CR16","doi-asserted-by":"crossref","unstructured":"Huang, X., Belongie, S.: Arbitrary style transfer in real-time with adaptive instance normalization. In: 2017 IEEE international conference on computer vision (ICCV), Oct (2017)","DOI":"10.1109\/ICCV.2017.167"},{"key":"4307_CR17","unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.S.: Unifying visual-semantic embeddings with multimodal neural language models. Cornell University - arXiv,Cornell University - arXiv, Nov (2014)"},{"key":"4307_CR18","doi-asserted-by":"crossref","unstructured":"Lei, X., Wen, X., Li, Z.: A multi-target cow face detection model in complex scenes. The Visual Computer, (2024)","DOI":"10.1007\/s00371-024-03301-w"},{"key":"4307_CR19","doi-asserted-by":"crossref","unstructured":"Li, D., Wu, A., Wang, Y., Han, Y.: Prompt-driven dynamic object-centric learning for single domain generalization. In: 2024 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pages 17606\u201317615, (2024)","DOI":"10.1109\/CVPR52733.2024.01667"},{"key":"4307_CR20","unstructured":"Li, G., Duan, N., Fang, Y., Gong, M., Jiang, D., Zhou, M.: Unicoder-vl: A universal encoder for vision and language by cross-modal pre-training"},{"key":"4307_CR21","unstructured":"Liu, Y., Zhou, S., Liu, X., Hao, C., Fan, B., Tian, J.: Unbiased faster r-cnn for single-source domain generalized object detection"},{"key":"4307_CR22","unstructured":"Jing, L., Batra, D., Parikh, D., Lee, S.: Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Neural Information processing systems, neural information processing systems Aug (2019)"},{"key":"4307_CR23","doi-asserted-by":"crossref","unstructured":"Pan, X., Luo, P., Shi, J., Tang, X.: Two at once: enhancing learning and generalization capacities via IBN-Net, page 484\u2013500. Jan (2018)","DOI":"10.1007\/978-3-030-01225-0_29"},{"key":"4307_CR24","doi-asserted-by":"crossref","unstructured":"Pan, X., Zhan, X., Shi, J., Tang, X., Luo, P.: Switchable whitening for deep representation learning. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), Oct (2019)","DOI":"10.1109\/ICCV.2019.00195"},{"key":"4307_CR25","unstructured":"Radford, A., Kim, J., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Amanda, A., Mishkin, P., Clark, J., Krueger, G., Sutskever, I.: Learning transferable visual models from natural language supervision. Cornell University - arXiv,Cornell University - arXiv, Feb (2021)"},{"key":"4307_CR26","unstructured":"Radford, A., Kim, J., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Amanda, A., Mishkin, P., Clark, J., Krueger, G., Sutskever, I.: Learning transferable visual models from natural language supervision. Cornell University - arXiv,Cornell University - arXiv, Feb (2021)"},{"key":"4307_CR27","unstructured":"Ramesh, A.: Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. Zero-shot text-to-image generation. Cornell University - arXiv, Cornell University - arXiv Feb (2021)"},{"key":"4307_CR28","doi-asserted-by":"crossref","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: IEEE transactions on pattern analysis and machine intelligence, page 1137\u20131149, Jun (2017)","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"4307_CR29","doi-asserted-by":"crossref","unstructured":"Sakaridis, C., Dai, D., Van\u00a0Gool, L.: Semantic foggy scene understanding with synthetic data. In: International journal of computer vision, page 973\u2013992, Sep (2018)","DOI":"10.1007\/s11263-018-1072-8"},{"key":"4307_CR30","doi-asserted-by":"crossref","unstructured":"Vidit, V., Engilberge, M., Salzmann, M.: Clip the gap: A single domain generalization approach for object detection. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pages 3219\u20133229, (2023)","DOI":"10.1109\/CVPR52729.2023.00314"},{"key":"4307_CR31","doi-asserted-by":"crossref","unstructured":"Wu, A., Deng, C.: Single-domain generalized object detection in urban scene via cyclic-disentangled self-distillation. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pages 837\u2013846, (2022) https:\/\/doi.org\/10.1109\/CVPR52688.2022.00092","DOI":"10.1109\/CVPR52688.2022.00092"},{"key":"4307_CR32","doi-asserted-by":"crossref","unstructured":"Wu., A., Li, R., Han, Y., Zhu, L., Yang, Y.: Vector-decomposed disentanglement for domain-invariant object detection. Cornell University - arXiv, Cornell University - arXiv Aug (2021)","DOI":"10.1109\/ICCV48922.2021.00921"},{"key":"4307_CR33","unstructured":"Wu, Z., Chen, X., Pan, Z., Liu, X., Liu, W., Dai, D., Gao, H., Ma, Y., Wu, C., Wang, B., Xie, Z., Wu, Y., Hu, K., Wang, J., Sun, Y., Li, Y., Piao, Y., Guan, K., Liu, A., Xie, X., You, Y., Dong, K., Yu, X., Zhang, H., Zhao, L., Wang, Y., Ruan, C.: Deepseek-vl2 :mixture-of-experts vision-language models for advanced multimodal understanding"},{"issue":"1","key":"4307_CR34","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1007\/s00371-023-02789-y","volume":"40","author":"F Xin","year":"2024","unstructured":"Xin, F., Zhang, H., Pan, H.: Hybrid dilated multilayer faster RCNN for object detection. Vis. Comput. 40(1), 393\u2013406 (2024)","journal-title":"Vis. Comput."},{"key":"4307_CR35","doi-asserted-by":"crossref","unstructured":"Yu, F., Chen, H., Wang, X., Xian, W., Chen, Y., Liu, F., Madhavan, V., Darrell, T.: Bdd100k: A diverse driving dataset for heterogeneous multitask learning. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Jun (2020)","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"4307_CR36","unstructured":"Zhang, Y., Jiang, H., Miura, Y., Manning, C.D., Langlotz, C.P.: Contrastive learning of medical visual representations from paired images and text. Cornell University - arXiv,Cornell University - arXiv, Oct (2020)"},{"key":"4307_CR37","unstructured":"Zhao, Y., Zhong, Z., Zhao, N., Sebe, N.: and GimHee Lee. A unified framework for visual domain generalization, Style-hallucinated dual consistency learning Dec (2022)"},{"key":"4307_CR38","doi-asserted-by":"crossref","unstructured":"Zhong, Y., Yang, J., Zhang, P., Li, C., Codella, N., Li, L.H., Zhou, L., Dai, X., Yuan, L., Li, Y., Gao, J.: Regionclip: Region-based language-image pretraining. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), Jun (2022)","DOI":"10.1109\/CVPR52688.2022.01629"},{"key":"4307_CR39","unstructured":"Zhong, Z.,x Zhao, Z., Lee, G., Sebe, N.: Adversarial style augmentation for domain generalized urban-scene segmentation. Jul (2022)"},{"key":"4307_CR40","unstructured":"Zhou, K., Yang, Y., Qiao, Y., Xiang, T.: Domain generalization with mixstyle. Learning, Learning Apr (2021)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04307-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-04307-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04307-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T13:02:59Z","timestamp":1772629379000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-04307-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,24]]},"references-count":40,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["4307"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-04307-8","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,24]]},"assertion":[{"value":"6 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"94"}}