{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:48:17Z","timestamp":1778082497075,"version":"3.51.4"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031916717","type":"print"},{"value":"9783031916724","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91672-4_13","type":"book-chapter","created":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T15:26:07Z","timestamp":1747754767000},"page":"201-217","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["On the\u00a0Potential of\u00a0Open-Vocabulary Models for\u00a0Object Detection in\u00a0Unusual Street Scenes"],"prefix":"10.1007","author":[{"given":"Sadia","family":"Ilyas","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4825-060X","authenticated-orcid":false,"given":"Ido","family":"Freeman","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3840-0184","authenticated-orcid":false,"given":"Matthias","family":"Rottmann","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"13_CR1","doi-asserted-by":"crossref","unstructured":"Blum, H., Sarlin, P.E., Nieto, J., Siegwart, R., Cadena, C.: Fishyscapes: a benchmark for safe semantic segmentation in autonomous driving. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00294"},{"key":"13_CR2","unstructured":"Bommasani, R., et\u00a0al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"13_CR4","unstructured":"Chan, R., et al.: Segment me if you can: A benchmark for anomaly segmentation. arXiv preprint arXiv:2104.14812 (2021)"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Chan, R., Rottmann, M., Gottschalk, H.: Entropy maximization and meta classification for out-of-distribution detection in semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5128\u20135137 (2021)","DOI":"10.1109\/ICCV48922.2021.00508"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, T., Song, L., Ge, Y., Liu, W., Wang, X., Shan, Y.: YOLO-World: Real-time open-vocabulary object detection. arXiv preprint arXiv:2401.17270 (2024)","DOI":"10.1109\/CVPR52733.2024.01599"},{"key":"13_CR8","unstructured":"Chhipa, P.C., De, K., Chippa, M.S., Saini, R., Liwicki, M.: Investigating robustness of open-vocabulary foundation object detectors under distribution shifts. arXiv preprint arXiv:2405.14874 (2024)"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"13_CR10","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Di\u00a0Biase, G., Blum, H., Siegwart, R., Cadena, C.: Pixel-wise anomaly detection in complex driving scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16918\u201316927 (2021)","DOI":"10.1109\/CVPR46437.2021.01664"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Du, H., et\u00a0al.: Uncovering what why and how: a comprehensive benchmark for causation understanding of video anomaly. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18793\u201318803 (2024)","DOI":"10.1109\/CVPR52733.2024.01778"},{"key":"13_CR13","unstructured":"Du, X., Wang, Z., Cai, M., Li, Y.: VOS: Learning what you don\u2019t know by virtual outlier synthesis. arXiv preprint arXiv:2202.01197 (2022)"},{"issue":"8","key":"13_CR14","doi-asserted-by":"publisher","first-page":"1035","DOI":"10.1007\/s10514-023-10132-6","volume":"47","author":"A Elhafsi","year":"2023","unstructured":"Elhafsi, A., Sinha, R., Agia, C., Schmerling, E., Nesnas, I.A., Pavone, M.: Semantic anomaly detection with large language models. Auton. Robot. 47(8), 1035\u20131055 (2023)","journal-title":"Auton. Robot."},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Esmaeilpour, S., Liu, B., Robertson, E., Shu, L.: Zero-shot out-of-distribution detection based on the pre-trained model clip. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a036, pp. 6568\u20136576 (2022)","DOI":"10.1609\/aaai.v36i6.20610"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Gasperini, S., Marcos-Ramiro, A., Schmidt, M., Navab, N., Busam, B., Tombari, F.: Segmenting known objects and unseen unknowns without prior knowledge. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19321\u201319332 (2023)","DOI":"10.1109\/ICCV51070.2023.01770"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Gu, Z., Zhu, B., Zhu, G., Chen, Y., Tang, M., Wang, J.: AnomalyGPT: detecting industrial anomalies using large vision-language models. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a038, pp. 1932\u20131940 (2024)","DOI":"10.1609\/aaai.v38i3.27963"},{"key":"13_CR18","unstructured":"Hendrycks, D., Gimpel, K.: A baseline for detecting misclassified and out-of-distribution examples in neural networks. arXiv preprint arXiv:1610.02136 (2016)"},{"key":"13_CR19","unstructured":"Hendrycks, D., Mazeika, M., Dietterich, T.: Deep anomaly detection with outlier exposure. In: Proceedings of the International Conference on Learning Representations (2019)"},{"key":"13_CR20","first-page":"677","volume":"34","author":"R Huang","year":"2021","unstructured":"Huang, R., Geng, A., Li, Y.: On the importance of gradients for detecting distributional shifts in the wild. Adv. Neural. Inf. Process. Syst. 34, 677\u2013689 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR21","unstructured":"Jang, J., Ye, S., Seo, M.: Can large language models truly understand prompts? A case study with negated prompts. In: Transfer Learning for Natural Language Processing Workshop, pp. 52\u201362. PMLR (2023)"},{"key":"13_CR22","unstructured":"Jiang, H., Fang, Z., Jiang, X., Zhong, Z., Liu, T., Han, B.: DOG: Diffusion-based outlier generation for out-of-distribution detection (2024). https:\/\/openreview.net\/forum?id=Go8hf9wKJx"},{"key":"13_CR23","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics YOLO (2023). https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Kamath, A., Singh, M., LeCun, Y., Synnaeve, G., Misra, I., Carion, N.: Mdetr-modulated detection for end-to-end multi-modal understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1780\u20131790 (2021)","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"13_CR25","unstructured":"Lakshminarayanan, B., Pritzel, A., Blundell, C.: Simple and scalable predictive uncertainty estimation using deep ensembles. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"13_CR26","unstructured":"Lee, K., Lee, K., Lee, H., Shin, J.: A simple unified framework for detecting out-of-distribution samples and adversarial attacks. Adv. Neural Inf. Process. Syst. 31 (2018)"},{"key":"13_CR27","unstructured":"Li, A., et al.: Anomaly detection of tabular data using LLMs. arXiv preprint arXiv:2406.16308 (2024)"},{"issue":"9","key":"13_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3555803","volume":"55","author":"B Li","year":"2023","unstructured":"Li, B., et al.: Trustworthy AI: from principles to practices. ACM Comput. Surv. 55(9), 1\u201346 (2023)","journal-title":"ACM Comput. Surv."},{"key":"13_CR29","unstructured":"Liang, S., Li, Y., Srikant, R.: Enhancing the reliability of out-of-distribution image detection in neural networks. arXiv preprint arXiv:1706.02690 (2017)"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Lis, K., Nakka, K., Fua, P., Salzmann, M.: Detecting the unexpected via image Resynthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2152\u20132161 (2019)","DOI":"10.1109\/ICCV.2019.00224"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding DINO: Marrying DINO with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"13_CR32","first-page":"21464","volume":"33","author":"W Liu","year":"2020","unstructured":"Liu, W., Wang, X., Owens, J., Li, Y.: Energy-based out-of-distribution detection. Adv. Neural. Inf. Process. Syst. 33, 21464\u201321475 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR33","unstructured":"Liu, Y., et al.: RoBERTa: A robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"13_CR34","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"13_CR35","unstructured":"Mukhoti, J., Gal, Y.: Evaluating bayesian deep learning methods for semantic segmentation. arXiv preprint arXiv:1811.12709 (2018)"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Nekrasov, A., Hermans, A., Kuhnert, L., Leibe, B.: UGainS: uncertainty guided anomaly instance segmentation. In: DAGM German Conference on Pattern Recognition, pp. 50\u201366. Springer (2023)","DOI":"10.1007\/978-3-031-54605-1_4"},{"key":"13_CR37","unstructured":"Nekrasov, A., Zhou, R., Ackermann, M., Hermans, A., Leibe, B., Rottmann, M.: OoDIS: Anomaly instance segmentation benchmark. arXiv preprint arXiv:2406.11835 (2024)"},{"key":"13_CR38","first-page":"21371","volume":"35","author":"P Oberdiek","year":"2022","unstructured":"Oberdiek, P., Fink, G., Rottmann, M.: UQGAN: a unified model for uncertainty quantification of deep classifiers trained via conditional GANs. Adv. Neural. Inf. Process. Syst. 35, 21371\u201321385 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR39","doi-asserted-by":"crossref","unstructured":"Pinggera, P., Ramos, S., Gehrig, S., Franke, U., Rother, C., Mester, R.: Lost and found: detecting small road hazards for self-driving vehicles. In: 2016 IEEE. In: RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1099\u20131106","DOI":"10.1109\/IROS.2016.7759186"},{"key":"13_CR40","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"13_CR41","doi-asserted-by":"crossref","unstructured":"Rai, S.N., Cermelli, F., Fontanel, D., Masone, C., Caputo, B.: Unmasking anomalies in road-scene segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4037\u20134046 (2023)","DOI":"10.1109\/ICCV51070.2023.00373"},{"key":"13_CR42","unstructured":"Sun, H., He, R., Han, Z., Lin, Z., Gong, Y., Yin, Y.: Clip-driven outliers synthesis for few-shot OOD detection. arXiv preprint arXiv:2404.00323 (2024)"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Wang, H., Li, Y., Yao, H., Li, X.: CLIPN for zero-shot OOD detection: teaching clip to say no. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1802\u20131812 (2023)","DOI":"10.1109\/ICCV51070.2023.00173"},{"key":"13_CR44","doi-asserted-by":"crossref","unstructured":"Wilson, S., Fischer, T., Dayoub, F., Miller, D., S\u00fcnderhauf, N.: SAFE: sensitivity-aware features for out-of-distribution object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 23565\u201323576 (2023)","DOI":"10.1109\/ICCV51070.2023.02154"},{"key":"13_CR45","unstructured":"Xu, X., Cao, Y., Chen, Y., Shen, W., Huang, X.: Customizing visual-language foundation models for multi-modal anomaly detection and reasoning. arXiv preprint arXiv:2403.11083 (2024)"},{"key":"13_CR46","doi-asserted-by":"crossref","unstructured":"Yu, F., et al.: BDD100K: a diverse driving dataset for heterogeneous multitask learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2636\u20132645 (2020)","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"13_CR47","unstructured":"Zhang, H., et al.: DINO: DETR with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605 (2022)"},{"key":"13_CR48","unstructured":"Zhao, T., Liu, P., He, X., Zhang, L., Lee, K.: Real-time transformer-based open-vocabulary detection with efficient fusion head. arXiv preprint arXiv:2403.06892 (2024)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91672-4_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T15:26:29Z","timestamp":1747754789000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91672-4_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031916717","9783031916724"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91672-4_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}