{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:14:48Z","timestamp":1777655688609,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755422","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"4408-4417","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["UniAD: Integrating Geometric and Semantic Cues for Unified Anomaly Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9153-1928","authenticated-orcid":false,"given":"Xiaodong","family":"Wang","sequence":"first","affiliation":[{"name":"Xiamen University of Technology, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6697-8073","authenticated-orcid":false,"given":"Hongmin","family":"Hu","sequence":"additional","affiliation":[{"name":"Xiamen University of Technology, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3470-2121","authenticated-orcid":false,"given":"Fei","family":"Yan","sequence":"additional","affiliation":[{"name":"Xiamen University of Technology, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7098-2789","authenticated-orcid":false,"given":"Junwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Xiamen University of Technology, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9716-1182","authenticated-orcid":false,"given":"Zhiqiang","family":"Zeng","sequence":"additional","affiliation":[{"name":"Xiamen University of Technology, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0886-654X","authenticated-orcid":false,"given":"Weidong","family":"Hong","sequence":"additional","affiliation":[{"name":"Xiamen Truesight Technology Co., Ltd, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2434-9050","authenticated-orcid":false,"given":"Zhedong","family":"Zheng","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"622","volume-title":"Perth","author":"Akcay Samet","year":"2019","unstructured":"Samet Akcay, Amir Atapour-Abarghouei, and Toby P Breckon. 2019. Ganomaly: Semi-supervised anomaly detection via adversarial training. In Computer Vision-ACCV 2018: 14th Asian Conference on Computer Vision, Perth, Australia, December 2-6, 2018, Revised Selected Papers, Part III 14. Springer, 622-637."},{"key":"e_1_3_2_1_2_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang Binyuan Hui Luo Ji Mei Li Junyang Lin Runji Lin Dayiheng Liu Gao Liu Chengqiang Lu Keming Lu Jianxin Ma Rui Men Xingzhang Ren Xuancheng Ren Chuanqi Tan Sinan Tan Jianhong Tu Peng Wang Shijie Wang Wei Wang Shengguang Wu Benfeng Xu Jin Xu An Yang Hao Yang Jian Yang Shusheng Yang Yang Yao Bowen Yu Hongyi Yuan Zheng Yuan Jianwei Zhang Xingxuan Zhang Yichang Zhang Zhenru Zhang Chang Zhou Jingren Zhou Xiaohuan Zhou and Tianhang Zhu. 2023. Qwen Technical Report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00020"},{"key":"e_1_3_2_1_4_1","first-page":"161","volume-title":"Stroke and Traumatic Brain Injuries: 4th International Workshop, BrainLes 2018, Held in Conjunction with MICCAI 2018","author":"Baur Christoph","year":"2019","unstructured":"Christoph Baur, Benedikt Wiestler, Shadi Albarqouni, and Nassir Navab. 2019. Deep autoencoding models for unsupervised anomaly segmentation in brain MR images. In Brainlesion: Glioma, Multiple Sclerosis, Stroke and Traumatic Brain Injuries: 4th International Workshop, BrainLes 2018, Held in Conjunction with MICCAI 2018, Granada, Spain, September 16, 2018, Revised Selected Papers, Part I 4. Springer, 161-169."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01578-9"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00982"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00424"},{"key":"e_1_3_2_1_8_1","volume-title":"Improving unsupervised defect segmentation by applying structural similarity to autoencoders. arXiv preprint arXiv:1807.02011","author":"Bergmann Paul","year":"2018","unstructured":"Paul Bergmann, Sindy L\u00f6we, Michael Fauser, David Sattlegger, and Carsten Steger. 2018. Improving unsupervised defect segmentation by applying structural similarity to autoencoders. arXiv preprint arXiv:1807.02011 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00951"},{"key":"e_1_3_2_1_11_1","volume-title":"Int. Conf. knowledge discovery and data mining","volume":"240","author":"Ester Martin","year":"1996","unstructured":"Martin Ester, Hans-Peter Kriegel, J\u00f6rg Sander, and Xiaowei Xu. 1996. Density-based spatial clustering of applications with noise. In Int. Conf. knowledge discovery and data mining, Vol. 240."},{"key":"e_1_3_2_1_12_1","volume-title":"Cmkd: Cnn\/transformer-based cross-model knowledge distillation for audio classification. arXiv preprint arXiv:2203.06760","author":"Gong Yuan","year":"2022","unstructured":"Yuan Gong, Sameer Khurana, Andrew Rouditchenko, and James Glass. 2022. Cmkd: Cnn\/transformer-based cross-model knowledge distillation for audio classification. arXiv preprint arXiv:2203.06760 (2022)."},{"key":"e_1_3_2_1_13_1","volume-title":"Generative adversarial nets. Advances in neural information processing systems","author":"Goodfellow Ian J","year":"2014","unstructured":"Ian J Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680685"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28690"},{"key":"e_1_3_2_1_16_1","volume-title":"Deep Residual Learning for Image Recognition. arXiv preprint arXiv:1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arXiv preprint arXiv:1512.03385 (2015)."},{"key":"e_1_3_2_1_17_1","volume-title":"CSAD: Unsupervised Component Segmentation for Logical Anomaly Detection. arXiv:2408.15628 [cs.CV] https:\/\/arxiv.org\/abs\/2408.15628","author":"Hsieh Yu-Hsuan","year":"2024","unstructured":"Yu-Hsuan Hsieh and Shang-Hong Lai. 2024. CSAD: Unsupervised Component Segmentation for Logical Anomaly Detection. arXiv:2408.15628 [cs.CV] https:\/\/arxiv.org\/abs\/2408.15628"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01081"},{"key":"e_1_3_2_1_19_1","volume-title":"LogicAD: Explainable Anomaly Detection via VLM-based Text Feature Extraction. arXiv preprint arXiv:2501.01767","author":"Jin Er","year":"2025","unstructured":"Er Jin, Qihui Feng, Yongli Mou, Stefan Decker, Gerhard Lakemeyer, Oliver Simons, and Johannes Stegmaier. 2025. LogicAD: Explainable Anomaly Detection via VLM-based Text Feature Extraction. arXiv preprint arXiv:2501.01767 (2025)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28703"},{"key":"e_1_3_2_1_21_1","volume-title":"ICML","volume":"1","author":"Lafferty John","year":"2001","unstructured":"John Lafferty, Andrew McCallum, Fernando Pereira, et al., 2001. Conditional random fields: Probabilistic models for segmenting and labeling sequence data. In ICML, Vol. 1. Williamstown, MA, 3."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02504"},{"key":"e_1_3_2_1_23_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023a. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_24_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In ICML.","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022a. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In ICML."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548232"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00113"},{"key":"e_1_3_2_1_27_1","volume-title":"Myriad: Large multimodal model by applying vision experts for industrial anomaly detection. arXiv preprint arXiv:2310.19070","author":"Li Yuanze","year":"2023","unstructured":"Yuanze Li, Haolin Wang, Shihao Yuan, Ming Liu, Debin Zhao, Yiwen Guo, Chen Xu, Guangming Shi, and Wangmeng Zuo. 2023b. Myriad: Large multimodal model by applying vision experts for industrial anomaly detection. arXiv preprint arXiv:2310.19070 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Fair: Frequency-aware image restoration for industrial visual anomaly detection. arXiv preprint arXiv:2309.07068","author":"Liu Tongkun","year":"2023","unstructured":"Tongkun Liu, Bing Li, Xiao Du, Bingke Jiang, Leqi Geng, Feiyang Wang, and Zhuo Zhao. 2023a. Fair: Frequency-aware image restoration for industrial visual anomaly detection. arXiv preprint arXiv:2309.07068 (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2023.102161"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681669"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01954"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIM.2023.3268658","article-title":"Normal reference attention and defective feature perception network for surface defect detection","volume":"72","author":"Luo Wei","year":"2023","unstructured":"Wei Luo, Haiming Yao, and Wenyong Yu. 2023. Normal reference attention and defective feature perception network for surface defect detection. IEEE Transactions on Instrumentation and Measurement, Vol. 72 (2023), 1-14.","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"e_1_3_2_1_33_1","volume-title":"MoEAD: A Parameter-Efficient Model for Multi-class Anomaly Detection. In European Conference on Computer Vision. Springer, 345-361","author":"Meng Shiyuan","year":"2024","unstructured":"Shiyuan Meng, Wenchao Meng, Qihang Zhou, Shizhong Li, Weiye Hou, and Shibo He. 2024. MoEAD: A Parameter-Efficient Model for Multi-class Anomaly Detection. In European Conference on Computer Vision. Springer, 345-361."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2025.113176"},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01392"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680771"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_29"},{"key":"e_1_3_2_1_39_1","volume-title":"Student-teacher feature pyramid matching for anomaly detection. arXiv preprint arXiv:2103.04257","author":"Wang Guodong","year":"2021","unstructured":"Guodong Wang, Shumin Han, Errui Ding, and Di Huang. 2021. Student-teacher feature pyramid matching for anomaly detection. arXiv preprint arXiv:2103.04257 (2021)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110862"},{"key":"e_1_3_2_1_41_1","volume-title":"Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search. In ICCV.","author":"Yang Shuyu","year":"2025","unstructured":"Shuyu Yang, Yaxiong Wang, Li Zhu, and Zhedong Zheng. 2025. Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search. In ICCV."},{"key":"e_1_3_2_1_42_1","volume-title":"Visual anomaly detection via dual-attention transformer and discriminative flow. arXiv preprint arXiv:2303.17882","author":"Yao Haiming","year":"2023","unstructured":"Haiming Yao, Wei Luo, and Wenyong Yu. 2023. Visual anomaly detection via dual-attention transformer and discriminative flow. arXiv preprint arXiv:2303.17882 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"European Conference on Computer Vision. Springer, 92-108","author":"Yao Xincheng","year":"2024","unstructured":"Xincheng Yao, Ruoqi Li, Zefeng Qian, Lu Wang, and Chongyang Zhang. 2024. Hierarchical gaussian mixture normalizing flow modeling for unified anomaly detection. In European Conference on Computer Vision. Springer, 92-108."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_31"},{"key":"e_1_3_2_1_45_1","volume-title":"Long-CLIP: Unlocking the Long-Text Capability of CLIP. arXiv preprint arXiv:2403.15378","author":"Zhang Beichen","year":"2024","unstructured":"Beichen Zhang, Pan Zhang, Xiaoyi Dong, Yuhang Zang, and Jiaqi Wang. 2024b. Long-CLIP: Unlocking the Long-Text Capability of CLIP. arXiv preprint arXiv:2403.15378 (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00022"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_22"},{"key":"e_1_3_2_1_48_1","volume-title":"SPot-the-Difference Self-Supervised Pre-training for Anomaly Detection and Segmentation. arXiv preprint arXiv:2207.14315","author":"Zou Yang","year":"2022","unstructured":"Yang Zou, Jongheon Jeong, Latha Pemula, Dongqing Zhang, and Onkar Dabeer. 2022. SPot-the-Difference Self-Supervised Pre-training for Anomaly Detection and Segmentation. arXiv preprint arXiv:2207.14315 (2022)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755422","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:11:44Z","timestamp":1765339904000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755422"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":48,"alternative-id":["10.1145\/3746027.3755422","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755422","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}