{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:57:24Z","timestamp":1781539044924,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Natural Science Foundation of China (Regional Project)","award":["62466056"],"award-info":[{"award-number":["62466056"]}]},{"name":"Tianchi Young Talent Doctoral Program","award":["51052501824"],"award-info":[{"award-number":["51052501824"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810745","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"654-662","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SSHF-CLIP: Semantic-Guided Scale Selection and Hybrid Fusion for Zero-Shot Anomaly Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0914-6184","authenticated-orcid":false,"given":"Kaiyuan","family":"Jin","sequence":"first","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6903-5311","authenticated-orcid":false,"given":"Wayit","family":"Abliz","sequence":"additional","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9225-0554","authenticated-orcid":false,"given":"Maihemuti","family":"Maimaiti","sequence":"additional","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9475-1101","authenticated-orcid":false,"given":"Zaokere","family":"Kadeer","sequence":"additional","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1681-1089","authenticated-orcid":false,"given":"Aishan","family":"Wumaier","sequence":"additional","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2248-4678","authenticated-orcid":false,"given":"Abdujelil","family":"Abdurahman","sequence":"additional","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2934-6339","authenticated-orcid":false,"given":"Panpan","family":"Zheng","sequence":"additional","affiliation":[{"name":"Xinjiang University, Urumqi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","first-page":"622","volume-title":"Asian conference on computer vision","author":"Akcay Samet","year":"2018","unstructured":"Samet Akcay, Amir Atapour-Abarghouei, and Toby\u00a0P Breckon. 2018. Ganomaly: Semi-supervised anomaly detection via adversarial training. In Asian conference on computer vision. Springer, 622\u2013637."},{"key":"e_1_3_3_1_3_2","volume-title":"k-means++: The advantages of careful seeding","author":"Arthur David","year":"2006","unstructured":"David Arthur and Sergei Vassilvitskii. 2006. k-means++: The advantages of careful seeding. Technical Report. Stanford."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00408"},{"key":"e_1_3_3_1_5_2","first-page":"9592","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"Bergmann Paul","year":"2019","unstructured":"Paul Bergmann, Michael Fauser, David Sattlegger, and Carsten Steger. 2019. MVTec AD\u2013A comprehensive real-world dataset for unsupervised anomaly detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 9592\u20139600."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Jorge Bernal F\u00a0Javier S\u00e1nchez Gloria Fern\u00e1ndez-Esparrach Debora Gil Cristina Rodr\u00edguez and Fernando Vilari\u00f1o. 2015. WM-DOVA maps for accurate polyp highlighting in colonoscopy: Validation vs. saliency maps from physicians. Computerized medical imaging and graphics 43 (2015) 99\u2013111.","DOI":"10.1016\/j.compmedimag.2015.02.007"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-06430-2_56"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Mateusz Buda Ashirbani Saha and Maciej\u00a0A Mazurowski. 2019. Association of genomic subtypes of lower-grade gliomas with shape features automatically extracted by a deep learning algorithm. Computers in biology and medicine 109 (2019) 218\u2013225.","DOI":"10.1016\/j.compbiomed.2019.05.002"},{"key":"e_1_3_3_1_9_2","first-page":"55","volume-title":"European Conference on Computer Vision","author":"Cao Yunkang","year":"2024","unstructured":"Yunkang Cao, Jiangning Zhang, Luca Frittoli, Yuqi Cheng, Weiming Shen, and Giacomo Boracchi. 2024. Adaclip: Adapting clip with hybrid learnable prompts for zero-shot anomaly detection. In European Conference on Computer Vision. Springer, 55\u201372."},{"key":"e_1_3_3_1_10_2","unstructured":"Xuhai Chen Yue Han and Jiangning Zhang. 2023. April-gan: A zero-\/few-shot anomaly classification and segmentation method for cvpr 2023 vand workshop challenge tracks 1&2: 1st place on zero-shot ad and 4th place on few-shot ad. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.17382 (2023)."},{"key":"e_1_3_3_1_11_2","unstructured":"Xuhai Chen Yue Han and Jiangning Zhang. 2023. A zero-\/few-shot anomaly classification and segmentation method for CVPR 2023 (VAND) workshop challenge tracks 1 &2. 1st Place on Zero-shot AD and 4th Place on Few-shot AD 2305 (2023) 17382."},{"key":"e_1_3_3_1_12_2","unstructured":"N Cohen and Y Hoshen. 2005. Sub-image anomaly detection with deep pyramid correspondences. arXiv 2020. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.02357 (2005)."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-68799-1_35"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00951"},{"key":"e_1_3_3_1_15_2","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.11929 (2020)."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755635"},{"key":"e_1_3_3_1_17_2","unstructured":"Bin-Bin Gao Yue Zhou Jiangtao Yan Yuezhi Cai Weixi Zhang Meng Wang Jun Liu Yong Liu Lei Wang and Chengjie Wang. 2025. AdaptCLIP: Adapting CLIP for Universal Visual Anomaly Detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.09926 (2025)."},{"key":"e_1_3_3_1_18_2","unstructured":"Ian\u00a0J Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00188"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01081"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00205"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01878"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Daehyun Kim Sungyong Baik and Tae\u00a0Hyun Kim. 2023. Sanflow: Semantic-aware normalizing flow for anomaly detection. Advances in neural information processing systems 36 (2023) 75434\u201375454.","DOI":"10.52202\/075280-3297"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01954"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00138"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00447"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISIE45552.2021.9576231"},{"key":"e_1_3_3_1_28_2","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et\u00a0al. 2019. Pytorch: An imperative style high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3083187.3083212"},{"key":"e_1_3_3_1_30_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25309"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01392"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02348"},{"key":"e_1_3_3_1_34_2","first-page":"11","volume-title":"DAGM symposium in","author":"Wieler Matthias","year":"2007","unstructured":"Matthias Wieler and Tobias Hahn. 2007. Weakly supervised learning for industrial optical inspection. In DAGM symposium in , Vol.\u00a06. 11."},{"key":"e_1_3_3_1_35_2","volume-title":"Proceedings of the Asian conference on computer vision","author":"Yi Jihun","year":"2020","unstructured":"Jihun Yi and Sungroh Yoon. 2020. Patch svdd: Patch-level svdd for anomaly detection and segmentation. In Proceedings of the Asian conference on computer vision."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9005589"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412161"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"crossref","unstructured":"Vitjan Zavrtanik Matej Kristan and Danijel Sko\u010daj. 2021. Reconstruction by inpainting for visual anomaly detection. Pattern Recognition 112 (2021) 107706.","DOI":"10.1016\/j.patcog.2020.107706"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00381"},{"key":"e_1_3_3_1_40_2","volume-title":"The Eleventh International Conference on Learning Representations","author":"Zhao Yuzhong","year":"2023","unstructured":"Yuzhong Zhao, Qiaoqiao Ding, and Xiaoqun Zhang. 2023. AE-FLOW: Autoencoders with normalizing flows for medical images anomaly detection. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011278"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011286"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData52589.2021.9671406"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Kaiyang Zhou Jingkang Yang Chen\u00a0Change Loy and Ziwei Liu. 2022. Learning to prompt for vision-language models. International Journal of Computer Vision 130 9 (2022) 2337\u20132348.","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_3_1_45_2","unstructured":"Qihang Zhou Guansong Pang Yu Tian Shibo He and Jiming Chen. 2023. Anomalyclip: Object-agnostic prompt learning for zero-shot anomaly detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.18961 (2023)."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_23"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:38:11Z","timestamp":1781537891000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810745"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":45,"alternative-id":["10.1145\/3805622.3810745","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810745","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}