{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T17:14:14Z","timestamp":1743009254186,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819784899"},{"type":"electronic","value":"9789819784905"}],"license":[{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8490-5_4","type":"book-chapter","created":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T09:09:07Z","timestamp":1730884147000},"page":"46-60","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Zero-Shot Anomaly Detection: CLIP-SAM Collaboration with\u00a0Cascaded Prompts"],"prefix":"10.1007","author":[{"given":"Yanning","family":"Hou","sequence":"first","affiliation":[]},{"given":"Ke","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Junfa","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yanran","family":"Ruan","sequence":"additional","affiliation":[]},{"given":"Jianfeng","family":"Qiu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Aota, T., Tong, L.T.T., Okatani, T.: Zero-shot versus many-shot: unsupervised texture anomaly detection. In: IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV, pp. 5553\u20135561. IEEE (2023)","DOI":"10.1109\/WACV56688.2023.00552"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Bergmann, P., Fauser, M., Sattlegger, D., Steger, C.: MVTec AD\u2014A comprehensive real-world dataset for unsupervised anomaly detection. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00982"},{"key":"4_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108846","volume":"248","author":"Y Cao","year":"2022","unstructured":"Cao, Y., Wan, Q., Shen, W., Gao, L.: Informative knowledge distillation for image anomaly segmentation. Knowl. Based Syst. 248, 108846 (2022)","journal-title":"Knowl. Based Syst."},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Cao, Y., Xu, X., Shen, W.: Complementary pseudo multimodal feature for point cloud anomaly detection. CoRR abs\/2303.13194 (2023)","DOI":"10.1016\/j.patcog.2024.110761"},{"key":"4_CR5","unstructured":"Cao, Y., et al.: Segment any anomaly without training via hybrid prompt regularization. CoRR. arXiv:2305.10724 (2023)"},{"key":"4_CR6","unstructured":"Chen, X., Han, Y., Zhang, J.: A zero-\/few-shot anomaly classification and segmentation method for CVPR 2023 VAND workshop challenge tracks 1 &2: 1st place on zero-shot AD and 4th place on few-shot AD. CoRR. arXiv:2305.17382 (2023)"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: CLIP-AD: a language-guided staged dual-path model for zero-shot anomaly detection. CoRR. arXiv:2311.00453 (2023)","DOI":"10.1007\/978-981-97-9003-6_2"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Deng, H., Li, X.: Anomaly detection via reverse distillation from one-class embedding. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, pp. 9727\u20139736 (2022)","DOI":"10.1109\/CVPR52688.2022.00951"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Deng, H., Li, X.: Anomaly detection via reverse distillation from one-class embedding. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, pp. 9727\u20139736 (2022)","DOI":"10.1109\/CVPR52688.2022.00951"},{"key":"4_CR10","unstructured":"Deng, H., Zhang, Z., Bao, J., Li, X.: AnoVL: adapting vision-language models for unified zero-shot anomaly localization. CoRR. arXiv:2308.15939 (2023)"},{"key":"4_CR11","unstructured":"Dosovitskiy, A., et al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale. In: 9th International Conference on Learning Representations, ICLR (2021)"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Gong, D., et al.: Memorizing normality to detect anomaly: memory-augmented deep autoencoder for unsupervised anomaly detection. In: IEEE\/CVF International Conference on Computer Vision, ICCV, pp. 1705\u20131714. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00179"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR, pp. 770\u2013778. IEEE Computer Society (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Jeong, J., Zou, Y., Kim, T., Zhang, D., Ravichandran, A., Dabeer, O.: Winclip: Zero-\/few-shot anomaly classification and segmentation. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19606\u201319616 (2023)","DOI":"10.1109\/CVPR52729.2023.01878"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Wasim, S.T., Naseer, M., Khan, S., Yang, M., Khan, F.S.: Self-regulating prompts: foundational model adaptation without forgetting. In: IEEE\/CVF International Conference on Computer Vision, ICCV, pp. 15144\u201315154 (2023)","DOI":"10.1109\/ICCV51070.2023.01394"},{"key":"4_CR16","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.C.H.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Krause, A., Brunskill, E., Cho, K., Engelhardt, B., Sabato, S., Scarlett, J. (eds.) International Conference on Machine Learning, ICML. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 19730\u201319742. PMLR (2023)"},{"key":"4_CR17","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.C.H.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Chaudhuri, K., Jegelka, S., Song, L., Szepesv\u00e1ri, C., Niu, G., Sabato, S. (eds.) International Conference on Machine Learning, ICML. Proceedings of Machine Learning Research, vol.\u00a0162, pp. 12888\u201312900. PMLR (2022)"},{"key":"4_CR18","unstructured":"Li, S., Cao, J., Ye, P., Ding, Y., Tu, C., Chen, T.: ClipSAM: CLIP and SAM collaboration for zero-shot anomaly segmentation. CoRR. arXiv:2401.12665 (2024)"},{"key":"4_CR19","unstructured":"Lin, X., Xiang, Y., Zhang, L., Yang, X., Yan, Z., Yu, L.: SAMUS: adapting segment anything model for clinically-friendly and generalizable ultrasound image segmentation. CoRR. arXiv:2309.06824 (2023)"},{"key":"4_CR20","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Annual Conference on Neural Information Processing Systems 2023, NeurIPS (2023)"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Liu, S., et al.: Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. CoRR (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"4_CR22","unstructured":"Lu, J., Clark, C., Zellers, R., Mottaghi, R., Kembhavi, A.: UNIFIED-IO: a unified model for vision, language, and multi-modal tasks. In: The Eleventh International Conference on Learning Representations, ICLR (2023)"},{"issue":"6","key":"4_CR23","doi-asserted-by":"publisher","first-page":"2313","DOI":"10.1109\/TNNLS.2021.3130074","volume":"33","author":"FV Massoli","year":"2022","unstructured":"Massoli, F.V., Falchi, F., Kantarci, A., Akti, S., Ekenel, H.K., Amato, G.: MOCCA: multilayer one-class classification for anomaly detection. IEEE Trans. Neural Netw. Learn. Syst. 33(6), 2313\u20132323 (2022)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"4_CR24","unstructured":"Nagy, A.M.: Zero-shot learning and classification of steel surface defects. In: Osten, W., Nikolaev, D. (eds.) Fourteenth International Conference on Machine Vision, ICMV (2021)"},{"key":"4_CR25","unstructured":"Oquab, M., et al.: Dinov2: learning robust visual features without supervision. CoRR. arXiv:2304.07193 (2023)"},{"key":"4_CR26","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning, (ICML), pp. 8748\u20138763 (2021)"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Rao, Y., et al.: DenseCLIP: language-guided dense prediction with context-aware prompting. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, pp. 18061\u201318070 (2022)","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Ristea, N., et al.: Self-supervised predictive convolutional attentive block for anomaly detection. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, pp. 13566\u201313576 (2022)","DOI":"10.1109\/CVPR52688.2022.01321"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Rivera, A.R., Khan, A., Bekkouch, I.E.I., Sheikh, T.S.: Anomaly detection based on zero-shot outlier synthesis and hierarchical feature distillation. IEEE Trans. Neural Netw. Learn. Syst. 281\u2013291 (2022)","DOI":"10.1109\/TNNLS.2020.3027667"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Roth, K., Pemula, L., Zepeda, J., Sch\u00f6lkopf, B., Brox, T., Gehler, P.V.: Towards total recall in industrial anomaly detection. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, pp. 14298\u201314308. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01392"},{"key":"4_CR31","unstructured":"Sohn, K., Li, C., Yoon, J., Jin, M., Pfister, T.: Learning and evaluating representations for deep one-class classification. In: International Conference on Learning Representations, ICLR (2021)"},{"issue":"6","key":"4_CR32","doi-asserted-by":"publisher","first-page":"6182","DOI":"10.1109\/TIE.2021.3094452","volume":"69","author":"Q Wan","year":"2022","unstructured":"Wan, Q., Gao, L., Li, X., Wen, L.: Industrial image anomaly localization based on Gaussian clustering of pretrained feature. IEEE Trans. Ind. Electron. 69(6), 6182\u20136192 (2022)","journal-title":"IEEE Trans. Ind. Electron."},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Yi, J., Yoon, S.: Patch SVDD: patch-level SVDD for anomaly detection and segmentation. In: Ishikawa, H., Liu, C., Pajdla, T., Shi, J. (eds.) 15th Asian Conference on Computer Vision ACCV. Lecture Notes in Computer Science, vol. 12627, pp. 375\u2013390 (2020)","DOI":"10.1007\/978-3-030-69544-6_23"},{"key":"4_CR34","unstructured":"Zhang, H., et al.: DINO: DETR with improved denoising anchor boxes for end-to-end object detection. In: The Eleventh International Conference on Learning Representations, ICLR (2023)"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, K., Liu, D.: Customized segment anything model for medical image segmentation. CoRR. arXiv:2304.13785 (2023)","DOI":"10.2139\/ssrn.4495221"},{"key":"4_CR36","unstructured":"Zhang, R., et al.: Personalize segment anything model with one shot. CoRR. arXiv:2305.03048 (2023)"},{"key":"4_CR37","doi-asserted-by":"crossref","unstructured":"Zhong, Y., et al.: RegionCLIP: region-based language-image pretraining. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, pp. 16772\u201316782 (2022)","DOI":"10.1109\/CVPR52688.2022.01629"},{"key":"4_CR38","unstructured":"Zhou, Q., Pang, G., Tian, Y., He, S., Chen, J.: AnomalyCLIP: object-agnostic prompt learning for zero-shot anomaly detection. In: The Twelfth International Conference on Learning Representations (ICLR), pp. 1\u201333 (2024)"},{"key":"4_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2024.3485395","volume":"73","author":"J Zhu","year":"2024","unstructured":"Zhu, J., Yan, P., Jiang, J., Cui, Y., Xu, X.: Asymmetric teacher-student feature pyramid matching for industrial anomaly detection. IEEE Trans. Instrum. Meas. 73, 1\u201313 (2024)","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Zou, Y., Jeong, J., Pemula, L., Zhang, D., Dabeer, O.: Spot-the-difference self-supervised pre-training for anomaly detection and segmentation. In: Avidan, S., Brostow, G.J., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022 - 17th European Conference, Tel Aviv., vol. 13690, pp. 392\u2013408 (2022)","DOI":"10.1007\/978-3-031-20056-4_23"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8490-5_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T21:34:04Z","timestamp":1733002444000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8490-5_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,7]]},"ISBN":["9789819784899","9789819784905"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8490-5_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,7]]},"assertion":[{"value":"7 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}