{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:29:23Z","timestamp":1772908163285,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031197741","type":"print"},{"value":"9783031197758","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19775-8_20","type":"book-chapter","created":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T12:12:59Z","timestamp":1666440779000},"page":"334-350","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["No Token Left Behind: Explainability-Aided Image Classification and\u00a0Generation"],"prefix":"10.1007","author":[{"given":"Roni","family":"Paiss","sequence":"first","affiliation":[]},{"given":"Hila","family":"Chefer","sequence":"additional","affiliation":[]},{"given":"Lior","family":"Wolf","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,23]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Abnar, S., Zuidema, W.: Quantifying attention flow in transformers. arXiv preprint arXiv:2005.00928 (2020)","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"20_CR2","unstructured":"Bau, D., et al.: Paint by word. arXiv preprint arXiv:2103.10951 (2021)"},{"key":"20_CR3","unstructured":"Berg, T., Forsyth, D.: Animals on the web. In: CVPR (2006)"},{"key":"20_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/978-3-319-44781-0_8","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2016","author":"A Binder","year":"2016","unstructured":"Binder, A., Montavon, G., Lapuschkin, S., M\u00fcller, K.-R., Samek, W.: Layer-wise relevance propagation for neural networks with local renormalization layers. In: Villa, A.E.P., Masulli, P., Pons Rivero, A.J. (eds.) ICANN 2016. LNCS, vol. 9887, pp. 63\u201371. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-44781-0_8"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. arXiv preprint arXiv:2005.12872 (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Chefer, H., Benaim, S., Paiss, R., Wolf, L.: Image-based clip-guided essence transfer (2021)","DOI":"10.1007\/978-3-031-19778-9_40"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Chefer, H., Gur, S., Wolf, L.: Generic attention-model explainability for interpreting bi-modal and encoder-decoder transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 397\u2013406, October 2021","DOI":"10.1109\/ICCV48922.2021.00045"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Chefer, H., Gur, S., Wolf, L.: Transformer interpretability beyond attention visualization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 782\u2013791, June 2021","DOI":"10.1109\/CVPR46437.2021.00084"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., Gupta, A.K.: Webly supervised learning of convolutional networks. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp. 1431\u20131439 (2015)","DOI":"10.1109\/ICCV.2015.168"},{"key":"20_CR10","unstructured":"Crowson, K.: VQGAN+CLIP (2021). https:\/\/colab.research.google.com\/drive\/1L8oL-vLJXVcRzCFbPwOoMkPKJ8-aYdPN"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"20_CR12","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12873\u201312883, June 2021","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"20_CR14","doi-asserted-by":"publisher","unstructured":"Fergus, R., Fei-Fei, L., Perona, P., Zisserman, A.: Learning object categories from internet image searches. Proc. IEEE 98(8), 1453\u20131466 (2010). https:\/\/doi.org\/10.1109\/JPROC.2010.2048990","DOI":"10.1109\/JPROC.2010.2048990"},{"key":"20_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530164","volume":"41","author":"R Gal","year":"2021","unstructured":"Gal, R., Patashnik, O., Maron, H., Chechik, G., Cohen-Or, D.: StyleGAN-NADA: CLIP-guided domain adaptation of image generators. ACM Trans. Graph. 41, 1\u20133 (2021)","journal-title":"ACM Trans. Graph."},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., et al.: The many faces of robustness: a critical analysis of out-of-distribution generalization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 8340\u20138349, October 2021","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Zhao, K., Basart, S., Steinhardt, J., Song, D.: Natural adversarial examples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15262\u201315271, June 2021","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of StyleGAN. In: CVPR, pp. 8110\u20138119 (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"20_CR20","unstructured":"Kim, G., Ye, J.C.: DiffusionCLIP: text-guided image manipulation using diffusion models (2021)"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Li, A., Jabri, A., Joulin, A., van der Maaten, L.: Learning visual n-grams from web data. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), October 2017","DOI":"10.1109\/ICCV.2017.449"},{"key":"20_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"20_CR23","unstructured":"Liu, X., Gong, C., Wu, L., Zhang, S., Su, H., Liu, Q.: FuseDream: training-free text-to-image generation with improved CLIP+GAN space optimization. arXiv:abs\/2112.01573 (2021)"},{"key":"20_CR24","unstructured":"Lundberg, S.M., Lee, S.I.: A unified approach to interpreting model predictions. In: Advances in Neural Information Processing Systems, pp. 4765\u20134774 (2017)"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Michel, O.J., Bar-On, R., Liu, R., Benaim, S., Hanocka, R.: Text2mesh: text-driven neural stylization for meshes. arXiv:abs\/2112.03221 (2021)","DOI":"10.1109\/CVPR52688.2022.01313"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Omri Avrahami, D.L., Friedn, O.: Blended diffusion for text-driven editing of natural images. arXiv preprint arxiv:2111.14818 (2021)","DOI":"10.1109\/CVPR52688.2022.01767"},{"issue":"1","key":"20_CR27","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1109\/TSMC.1979.4310076","volume":"9","author":"N Otsu","year":"1979","unstructured":"Otsu, N.: A threshold selection method from gray-level histograms. IEEE Trans. Syst. Man Cybern. 9(1), 62\u201366 (1979)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Patashnik, O., Wu, Z., Shechtman, E., Cohen-Or, D., Lischinski, D.: StyleCLIP: text-driven manipulation of StyleGAN imagery. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2085\u20132094 (2021)","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"20_CR29","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. arXiv preprint arXiv:2103.00020 (2021)"},{"issue":"8","key":"20_CR30","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"20_CR31","unstructured":"Recht, B., Roelofs, R., Schmidt, L., Shankar, V.: Do ImageNet classifiers generalize to ImageNet? In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 97, pp. 5389\u20135400. PMLR, 09\u201315 June 2019. https:\/\/proceedings.mlr.press\/v97\/recht19a.html"},{"key":"20_CR32","doi-asserted-by":"publisher","unstructured":"Rubinstein, M., Joulin, A., Kopf, J., Liu, C.: Unsupervised joint object discovery and segmentation in internet images. In: 2013 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1939\u20131946 (2013). https:\/\/doi.org\/10.1109\/CVPR.2013.253","DOI":"10.1109\/CVPR.2013.253"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"20_CR34","unstructured":"Shrikumar, A., Greenside, P., Shcherbina, A., Kundaje, A.: Not just a black box: learning important features through propagating activation differences. arXiv preprint arXiv:1605.01713 (2016)"},{"key":"20_CR35","doi-asserted-by":"publisher","unstructured":"Tang, K., Joulin, A., Li, L.J., Fei-Fei, L.: Co-localization in real-world images. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1464\u20131471 (2014). https:\/\/doi.org\/10.1109\/CVPR.2014.190","DOI":"10.1109\/CVPR.2014.190"},{"key":"20_CR36","unstructured":"Tewel, Y., Shalev, Y., Schwartz, I., Wolf, L.: Zero-shot image-to-text generation for visual-semantic arithmetic. CoRR abs\/2111.14447 (2021). arXiv:abs\/2111.14447"},{"key":"20_CR37","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"20_CR38","doi-asserted-by":"publisher","unstructured":"Vijayanarasimhan, S., Grauman, K.: Keywords to visual categories: multiple-instance learning for weakly supervised object categorization. In: 2008 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20138 (2008). https:\/\/doi.org\/10.1109\/CVPR.2008.4587632","DOI":"10.1109\/CVPR.2008.4587632"},{"key":"20_CR39","unstructured":"Wang, H., Ge, S., Lipton, Z., Xing, E.P.: Learning robust global representations by penalizing local predictive power. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 32. Curran Associates, Inc. (2019). https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/3eefceb8087e964f89c2d59e8a249915-Paper.pdf"},{"key":"20_CR40","doi-asserted-by":"publisher","unstructured":"Wang, X.J., Zhang, L., Li, X., Ma, W.Y.: Annotating images by mining image search results. IEEE Trans. Pattern Anal. Mach. Intell. 30(11), 1919\u20131932 (2008). https:\/\/doi.org\/10.1109\/TPAMI.2008.127","DOI":"10.1109\/TPAMI.2008.127"},{"key":"20_CR41","unstructured":"Zabari, N., Hoshen, Y.: Semantic segmentation in-the-wild without seeing any segmentation examples. arXiv:abs\/2112.03185 (2021)"},{"key":"20_CR42","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. arXiv preprint arXiv:2109.01134 (2021)"},{"key":"20_CR43","unstructured":"Zhu, P., Abdal, R., Femiani, J.C., Wonka, P.: Mind the gap: domain gap control for single shot domain adaptation for generative adversarial networks. arXiv:abs\/2110.08398 (2021)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19775-8_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T13:41:47Z","timestamp":1710337307000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19775-8_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031197741","9783031197758"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19775-8_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"23 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}