{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:53:43Z","timestamp":1781538823955,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810880","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"884-892","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Where &amp; What Anomaly? A Framework for Pose-Agnostic Anomaly Detection and Zero-Shot Semantic Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7992-5390","authenticated-orcid":false,"given":"Zhaokun","family":"Huang","sequence":"first","affiliation":[{"name":"Xi\u2019an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5978-3781","authenticated-orcid":false,"given":"Yonghong","family":"Song","sequence":"additional","affiliation":[{"name":"Xi\u2019an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7606-3493","authenticated-orcid":false,"given":"Heyao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xi\u2019an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Jinze Bai Shuai Bai Shusheng Yang Shijie Wang Sinan Tan Peng Wang Junyang Lin Chang Zhou and Jingren Zhou. 2023. Qwen-VL: A Versatile Vision-Language Model for Understanding Localization Text Reading and Beyond. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12966 (2023)."},{"key":"e_1_3_3_1_3_2","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang Humen Zhong Yuanzhi Zhu Mingkun Yang Zhaohai Li Jianqiang Wan Pengfei Wang Wei Ding Zheren Fu Yiheng Xu Jiabo Ye Xi Zhang Tianbao Xie Zesen Cheng Hang Zhang Zhibo Yang Haiyang Xu and Junyang Lin. 2025. Qwen2.5-VL Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13923 (2025)."},{"key":"e_1_3_3_1_4_2","first-page":"9592","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"Bergmann Paul","year":"2019","unstructured":"Paul Bergmann, Michael Fauser, David Sattlegger, and Carsten Steger. 2019. MVTec AD\u2013A comprehensive real-world dataset for unsupervised anomaly detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 9592\u20139600."},{"key":"e_1_3_3_1_5_2","unstructured":"Bin-Bin Gao Yue Zhou Jiangtao Yan Yuezhi Cai Weixi Zhang Meng Wang Jun Liu Yong Liu Lei Wang and Chengjie Wang. 2025. AdaptCLIP: Adapting CLIP for Universal Visual Anomaly Detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.09926 (2025)."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680685"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.27963"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28690"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01878"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681619"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimk\u00fchler and George Drettakis. 2023. 3D Gaussian splatting for real-time radiance field rendering. (2023).","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00399"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01594"},{"key":"e_1_3_3_1_15_2","unstructured":"Haotian Liu Chunyuan Li Yuheng Li and Yong\u00a0Jae Lee. 2023. Improved Baselines with Visual Instruction Tuning."},{"key":"e_1_3_3_1_16_2","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong\u00a0Jae Lee. 2024. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"e_1_3_3_1_17_2","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong\u00a0Jae Lee. 2023. Visual Instruction Tuning."},{"key":"e_1_3_3_1_18_2","first-page":"378","volume-title":"European Conference on Computer Vision","author":"Liu Yizhe","year":"2024","unstructured":"Yizhe Liu, Yan\u00a0Song Hu, Yuhao Chen, and John Zelek. 2024. SplatPose+: Real-Time Image-Based Pose-Agnostic 3D Anomaly Detection. In European Conference on Computer Vision. Springer, 378\u2013391."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00447"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul\u00a0P Srinivasan Matthew Tancik Jonathan\u00a0T Barron Ravi Ramamoorthi and Ren Ng. 2021. Nerf: Representing scenes as neural radiance fields for view synthesis. Commun. ACM 65 1 (2021) 99\u2013106.","DOI":"10.1145\/3503250"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.01908"},{"key":"e_1_3_3_1_22_2","first-page":"301","volume-title":"European Conference on Computer Vision","author":"Qu Zhen","year":"2024","unstructured":"Zhen Qu, Xian Tao, Mukesh Prasad, Fei Shen, Zhengtao Zhang, Xinyi Gong, and Guiguang Ding. 2024. Vcp-clip: A visual context prompting model for zero-shot anomaly segmentation. In European Conference on Computer Vision. Springer, 301\u2013317."},{"key":"e_1_3_3_1_23_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_1_24_2","unstructured":"Qwen Team. 2025. Qwen3 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2505.09388\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2505.09388"},{"key":"e_1_3_3_1_25_2","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Yang Fan Kai Dang Mengfei Du Xuancheng Ren Rui Men Dayiheng Liu Chang Zhou Jingren Zhou and Junyang Lin. 2024. Qwen2-VL: Enhancing Vision-Language Model\u2019s Perception of the World at Any Resolution. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.12191 (2024)."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3731715.3733295"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01897"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"crossref","unstructured":"Lihe Yang Bingyi Kang Zilong Huang Zhen Zhao Xiaogang Xu Jiashi Feng and Hengshuang Zhao. 2024. Depth anything v2. Advances in Neural Information Processing Systems 37 (2024) 21875\u201321911.","DOI":"10.52202\/079017-0688"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755725"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01580"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Qiang Zhou Weize Li Lihan Jiang Guoliang Wang Guyue Zhou Shanghang Zhang and Hao Zhao. 2023. Pad: A dataset and benchmark for pose-agnostic anomaly detection. Advances in Neural Information Processing Systems 36 (2023) 44558\u201344571.","DOI":"10.52202\/075280-1930"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01688"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:56:13Z","timestamp":1781535373000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810880"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":31,"alternative-id":["10.1145\/3805622.3810880","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810880","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}