{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T19:20:50Z","timestamp":1778613650375,"version":"3.51.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20185"],"award-info":[{"award-number":["U20A20185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20185"],"award-info":[{"award-number":["U20A20185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20185"],"award-info":[{"award-number":["U20A20185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20185"],"award-info":[{"award-number":["U20A20185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20185"],"award-info":[{"award-number":["U20A20185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"crossref","award":["2022B1515020103"],"award-info":[{"award-number":["2022B1515020103"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100021171","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"crossref","award":["2022B1515020103"],"award-info":[{"award-number":["2022B1515020103"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100021171","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"crossref","award":["2022B1515020103"],"award-info":[{"award-number":["2022B1515020103"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100021171","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"crossref","award":["2022B1515020103"],"award-info":[{"award-number":["2022B1515020103"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100021171","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"crossref","award":["2022B1515020103"],"award-info":[{"award-number":["2022B1515020103"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114641140"],"award-info":[{"award-number":["RCYX20200714114641140"]}]},{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114641140"],"award-info":[{"award-number":["RCYX20200714114641140"]}]},{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114641140"],"award-info":[{"award-number":["RCYX20200714114641140"]}]},{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114641140"],"award-info":[{"award-number":["RCYX20200714114641140"]}]},{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114641140"],"award-info":[{"award-number":["RCYX20200714114641140"]}]},{"name":"Major Key Project of PCL","award":["PCL2024A04"],"award-info":[{"award-number":["PCL2024A04"]}]},{"name":"Major Key Project of PCL","award":["PCL2024A04"],"award-info":[{"award-number":["PCL2024A04"]}]},{"name":"Major Key Project of PCL","award":["PCL2024A04"],"award-info":[{"award-number":["PCL2024A04"]}]},{"name":"Major Key Project of PCL","award":["PCL2024A04"],"award-info":[{"award-number":["PCL2024A04"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00371-025-03952-3","type":"journal-article","created":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T12:46:36Z","timestamp":1748263596000},"page":"6533-6546","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Energy-guided test-time adaptation for data shifts in multi-modal perception"],"prefix":"10.1007","volume":"41","author":[{"given":"Yun","family":"Pei","sequence":"first","affiliation":[]},{"given":"Lingbo","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Runqing","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Ye","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Pengpeng","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Liang","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Yulan","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,26]]},"reference":[{"key":"3952_CR1","unstructured":"Chen, Z., Meng, J., Baktashmotlagh, M., Zhang, Y., Huang, Z., Luo, Y.: Mos: Model synergy for test-time adaptation on lidar-based 3d object detection. arXiv preprint arXiv:2406.14878 (2024)"},{"key":"3952_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Z., Pan, Y., Ye, Y., Lu, M., Xia, Y.: Each test image deserves a specific prompt: Continual test-time adaptation for 2d medical image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11184\u201311193 (2024)","DOI":"10.1109\/CVPR52733.2024.01063"},{"key":"3952_CR3","doi-asserted-by":"publisher","first-page":"3924","DOI":"10.1109\/TIP.2023.3280389","volume":"32","author":"J Dang","year":"2023","unstructured":"Dang, J., Zheng, H., Lai, J., Yan, X., Guo, Y.: Efficient and robust video object segmentation through isogenous memory sampling and frame relation mining. IEEE Trans. Image Process. 32, 3924\u20133938 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"3952_CR4","doi-asserted-by":"crossref","unstructured":"Gao, Z., Zhang, X.Y., Liu, C.L.: Unified entropy optimization for open-set test-time adaptation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23975\u201323984 (2024)","DOI":"10.1109\/CVPR52733.2024.02263"},{"key":"3952_CR5","unstructured":"Gong, Y., Rouditchenko, A., Liu, A.H., Harwath, D., Karlinsky, L., Kuehne, H., Glass, J.: Contrastive audio-visual masked autoencoder. arXiv preprint arXiv:2210.07839 (2022)"},{"key":"3952_CR6","unstructured":"Guo, Z., Jin, T.: Smoothing the shift: Towards stable test-time adaptation under complex multimodal noises. In: The Thirteenth International Conference on Learning Representations (2025). https:\/\/openreview.net\/forum?id=rObkvzJxTG"},{"key":"3952_CR7","unstructured":"Han, B., Yao, Q., Yu, X., Niu, G., Xu, M., Hu, W., Tsang, I., Sugiyama, M.: Co-teaching: Robust training of deep neural networks with extremely noisy labels. Advances in neural information processing systems 31 (2018)"},{"key":"3952_CR8","doi-asserted-by":"crossref","unstructured":"Hegde, D., Kilic, V., Sindagi, V., Cooper, A.B., Foster, M., Patel, V.M.: Source-free unsupervised domain adaptation for 3d object detection in adverse weather. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 6973\u20136980. IEEE (2023)","DOI":"10.1109\/ICRA48891.2023.10161341"},{"key":"3952_CR9","unstructured":"Hendrycks, D., Dietterich, T.: Benchmarking neural network robustness to common corruptions and perturbations. arXiv preprint arXiv:1903.12261 (2019)"},{"key":"3952_CR10","doi-asserted-by":"crossref","unstructured":"Hong, Y., Zheng, Z., Chen, P., Wang, Y., Li, J., Gan, C.: Multiply: A multisensory object-centric embodied large language model in 3d world. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26406\u201326416 (2024)","DOI":"10.1109\/CVPR52733.2024.02494"},{"key":"3952_CR11","doi-asserted-by":"crossref","unstructured":"Hu, S., Gong, Y., Mori, G.: Embodied human activity recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 6447\u20136457 (2024)","DOI":"10.1109\/WACV57701.2024.00632"},{"key":"3952_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Y., Li, X., Zhou, Z., Wang, Y., He, Z., Yang, M.H.: Rtracker: Recoverable tracking via pn tree structured memory. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19038\u201319047 (2024)","DOI":"10.1109\/CVPR52733.2024.01801"},{"key":"3952_CR13","doi-asserted-by":"crossref","unstructured":"Iffath, F., Gavrilova, M.: Arf-net: a multi-modal aesthetic attention-based fusion. The Visual Computer pp. 1\u201313 (2024)","DOI":"10.1007\/s00371-024-03492-2"},{"key":"3952_CR14","doi-asserted-by":"crossref","unstructured":"Karmanov, A., Guan, D., Lu, S., El\u00a0Saddik, A., Xing, E.: Efficient test-time adaptation of vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14162\u201314171 (2024)","DOI":"10.1109\/CVPR52733.2024.01343"},{"key":"3952_CR15","unstructured":"Kay, W., Carreira, J., Simonyan, K., Zhang, B., Hillier, C., Vijayanarasimhan, S., Viola, F., Green, T., Back, T., Natsev, P., et\u00a0al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"3952_CR16","doi-asserted-by":"crossref","unstructured":"Kong, L., Liu, Y., Li, X., Chen, R., Zhang, W., Ren, J., Pan, L., Chen, K., Liu, Z.: Robo3d: Towards robust and reliable 3d perception against corruptions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19994\u201320006 (2023)","DOI":"10.1109\/ICCV51070.2023.01830"},{"key":"3952_CR17","unstructured":"Lee, J., Jung, D., Lee, S., Park, J., Shin, J., Hwang, U., Yoon, S.: Entropy is not enough for test-time adaptation: From the perspective of disentangled factors. arXiv preprint arXiv:2403.07366 (2024)"},{"key":"3952_CR18","doi-asserted-by":"crossref","unstructured":"Lee, S.U., Hofmann, A., Williams, B.: A model-based human activity recognition for human\u2013robot collaboration. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 736\u2013743. IEEE (2019)","DOI":"10.1109\/IROS40897.2019.8967650"},{"key":"3952_CR19","unstructured":"Li, H., Hu, P., Zhang, Q., Peng, X., XitingLiu, Yang, M.: Test-time adaptation for cross-modal retrieval with query shift. In: The Thirteenth International Conference on Learning Representations (2025). https:\/\/openreview.net\/forum?id=BmG88rONaU"},{"key":"3952_CR20","doi-asserted-by":"crossref","unstructured":"Li, X., Du, Z., Li, J., Zhu, L., Lu, K.: Source-free active domain adaptation via energy-based locality preserving transfer. In: Proceedings of the 30th ACM international conference on multimedia, pp. 5802\u20135810 (2022)","DOI":"10.1145\/3503161.3548152"},{"key":"3952_CR21","doi-asserted-by":"crossref","unstructured":"Liang, J., He, R., Tan, T.: A comprehensive survey on test-time adaptation under distribution shifts. Int. J. Comput. Vis. 1\u201334 (2024)","DOI":"10.1007\/s11263-024-02181-w"},{"key":"3952_CR22","first-page":"96","volume-title":"European Conference on Computer Vision","author":"H Lin","year":"2024","unstructured":"Lin, H., Zhang, Y., Niu, S., Cui, S., Li, Z.: Monotta: Fully test-time adaptation for monocular 3d object detection. In: European Conference on Computer Vision, pp. 96\u2013114. Springer, Cham (2024)"},{"issue":"3","key":"3952_CR23","doi-asserted-by":"publisher","first-page":"2248","DOI":"10.1109\/LRA.2024.3355752","volume":"9","author":"Z Liu","year":"2024","unstructured":"Liu, Z., Lu, X., Liu, W., Qi, W., Su, H.: Human-robot collaboration through a multi-scale graph convolution neural network with temporal attention. IEEE Robot. Automat. Lett. 9(3), 2248\u20132255 (2024)","journal-title":"IEEE Robot. Automat. Lett."},{"key":"3952_CR24","doi-asserted-by":"crossref","unstructured":"Mirza, M.J., Shin, I., Lin, W., Schriebl, A., Sun, K., Choe, J., Kozinski, M., Possegger, H., Kweon, I.S., Yoon, K.J., et\u00a0al.: Mate: Masked autoencoders are online 3d test-time learners. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16709\u201316718 (2023)","DOI":"10.1109\/ICCV51070.2023.01532"},{"key":"3952_CR25","unstructured":"Niu, S., Wu, J., Zhang, Y., Chen, Y., Zheng, S., Zhao, P., Tan, M.: Efficient test-time model adaptation without forgetting. In: International conference on machine learning, pp. 16888\u201316905. PMLR (2022)"},{"key":"3952_CR26","unstructured":"Niu, S., Wu, J., Zhang, Y., Wen, Z., Chen, Y., Zhao, P., Tan, M.: Towards stable test-time adaptation in dynamic wild world. arXiv preprint arXiv:2302.12400 (2023)"},{"key":"3952_CR27","first-page":"567","volume-title":"European Conference on Computer Vision","author":"C Saltori","year":"2022","unstructured":"Saltori, C., Krivosheev, E., Lathuili\u00e9re, S., Sebe, N., Galasso, F., Fiameni, G., Ricci, E., Poiesi, F.: Gipso: Geometrically informed propagation for online adaptation in 3d lidar segmentation. In: European Conference on Computer Vision, pp. 567\u2013585. Springer, Cham (2022)"},{"key":"3952_CR28","doi-asserted-by":"crossref","unstructured":"Shen, M., Ma, A.J., Yuen, P.C.: E 2: Entropy discrimination and energy optimization for source-free universal domain adaptation. In: 2023 IEEE International Conference on Multimedia and Expo (ICME), pp. 2705\u20132710. IEEE (2023)","DOI":"10.1109\/ICME55011.2023.00460"},{"key":"3952_CR29","doi-asserted-by":"crossref","unstructured":"Shin, I., Tsai, Y.H., Zhuang, B., Schulter, S., Liu, B., Garg, S., Kweon, I.S., Yoon, K.J.: Mm-tta: multi-modal test-time adaptation for 3d semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16928\u201316937 (2022)","DOI":"10.1109\/CVPR52688.2022.01642"},{"key":"3952_CR30","doi-asserted-by":"crossref","unstructured":"Suglia, A., Greco, C., Baker, K., Part, J.L., Papaioannou, I., Eshghi, A., Konstas, I., Lemon, O.: Alanavlm: A multimodal embodied ai foundation model for egocentric video understanding. arXiv preprint arXiv:2406.13807 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.649"},{"key":"3952_CR31","unstructured":"Wang, D., Shelhamer, E., Liu, S., Olshausen, B., Darrell, T.: Tent: Fully test-time adaptation by entropy minimization. arXiv preprint arXiv:2006.10726 (2020)"},{"key":"3952_CR32","doi-asserted-by":"crossref","unstructured":"Wang, R., Wang, W., Gao, J., Lin, D., Yap, K.H., Li, B.: Multifuser: Multimodal fusion transformer for enhanced driver action recognition. arXiv preprint arXiv:2408.01766 (2024)","DOI":"10.1109\/MMSP61759.2024.10743612"},{"key":"3952_CR33","doi-asserted-by":"crossref","unstructured":"Wang, T., Mao, X., Zhu, C., Xu, R., Lyu, R., Li, P., Chen, X., Zhang, W., Chen, K., Xue, T., et\u00a0al.: Embodiedscan: A holistic multi-modal 3d perception suite towards embodied ai. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19757\u201319767 (2024)","DOI":"10.1109\/CVPR52733.2024.01868"},{"key":"3952_CR34","doi-asserted-by":"crossref","unstructured":"Wang, W., Hager, G.D.: Domain adaptation of visual policies with a single demonstration. In: 2024 IEEE International Conference on Robotics and Automation (ICRA), pp. 17208\u201317215. IEEE (2024)","DOI":"10.1109\/ICRA57147.2024.10610569"},{"key":"3952_CR35","first-page":"1","volume":"41","author":"H Xiong","year":"2024","unstructured":"Xiong, H., Xiang, Y.: Robust gradient aware and reliable entropy minimization for stable test-time adaptation in dynamic scenarios. Vis. Comput. 41, 1\u201316 (2024)","journal-title":"Vis. Comput."},{"key":"3952_CR36","doi-asserted-by":"crossref","unstructured":"Yang, M., Huang, Z., Hu, P., Li, T., Lv, J., Peng, X.: Learning with twin noisy labels for visible-infrared person re-identification. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 14308\u201314317 (2022)","DOI":"10.1109\/CVPR52688.2022.01391"},{"issue":"1","key":"3952_CR37","doi-asserted-by":"publisher","first-page":"1055","DOI":"10.1109\/TPAMI.2022.3155499","volume":"45","author":"M Yang","year":"2022","unstructured":"Yang, M., Li, Y., Hu, P., Bai, J., Lv, J., Peng, X.: Robust multi-view clustering with incomplete information. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 1055\u20131069 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3952_CR38","unstructured":"Yang, M., Li, Y., Zhang, C., Hu, P., Peng, X.: Test-time adaptation against multi-modal reliability bias. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"3952_CR39","doi-asserted-by":"crossref","unstructured":"Yuan, L., Xie, B., Li, S.: Robust test-time adaptation in dynamic scenarios. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15922\u201315932 (2023)","DOI":"10.1109\/CVPR52729.2023.01528"},{"key":"3952_CR40","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Xu, B., Hou, L., Sun, F., Shen, H., Cheng, X.: Tea: Test-time energy adaptation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23901\u201323911 (2024)","DOI":"10.1109\/CVPR52733.2024.02256"},{"key":"3952_CR41","first-page":"493","volume-title":"European conference on computer vision","author":"R Zhang","year":"2022","unstructured":"Zhang, R., Zhang, W., Fang, R., Gao, P., Li, K., Dai, J., Qiao, Y., Li, H.: Tip-adapter: Training-free adaption of clip for few-shot classification. In: European conference on computer vision, pp. 493\u2013510. Springer, Cham (2022)"},{"key":"3952_CR42","first-page":"19","volume-title":"European Conference on Computer Vision","author":"T Zou","year":"2024","unstructured":"Zou, T., Qu, S., Li, Z., Knoll, A., He, L., Chen, G., Jiang, C.: Hgl: Hierarchical geometry learning for test-time adaptation in 3d point cloud segmentation. In: European Conference on Computer Vision, pp. 19\u201336. Springer, Cham (2024)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03952-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-03952-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03952-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T11:04:04Z","timestamp":1751886244000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-03952-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,26]]},"references-count":42,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["3952"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-03952-3","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,26]]},"assertion":[{"value":"21 April 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 May 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}