{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:13:34Z","timestamp":1757312014115,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031728891"},{"type":"electronic","value":"9783031728907"}],"license":[{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72890-7_14","type":"book-chapter","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T19:45:06Z","timestamp":1733514306000},"page":"231-247","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Lost in Translation: Modern Neural Networks Still Struggle with Small Realistic Image Transformations"],"prefix":"10.1007","author":[{"given":"Ofir","family":"Shifman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yair","family":"Weiss","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,7]]},"reference":[{"key":"14_CR1","unstructured":"Awais, M., et al.: Foundational models defining a new era in vision: a survey and outlook. arXiv preprint arXiv:2307.13721 (2023)"},{"key":"14_CR2","unstructured":"Azulay, A., Weiss, Y.: Why do deep convolutional networks generalize so poorly to small image transformations? arXiv preprint arXiv:1805.12177 (2018)"},{"key":"14_CR3","unstructured":"Bommasani, R., et\u00a0al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Chaman, A., Dokmanic, I.: Truly shift-invariant convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3773\u20133783 (2021)","DOI":"10.1109\/CVPR46437.2021.00377"},{"key":"14_CR6","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"14_CR7","unstructured":"Engstrom, L., Tsipras, D., Schmidt, L., Madry, A.: A rotation and a translation suffice: fooling CNNs with simple transformations (2017)"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Fang, Y., et al.: Eva: exploring the limits of masked visual representation learning at scale. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19358\u201319369 (2023)","DOI":"10.1109\/CVPR52729.2023.01855"},{"key":"14_CR9","doi-asserted-by":"publisher","unstructured":"Fonaryov, M., Lindenbaum, M.: On the minimal recognizable image patch. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 6734\u20136741 (2021). https:\/\/doi.org\/10.1109\/ICPR48806.2021.9412064","DOI":"10.1109\/ICPR48806.2021.9412064"},{"key":"14_CR10","doi-asserted-by":"publisher","first-page":"826","DOI":"10.1109\/TSMC.1983.6313076","volume":"5","author":"K Fukushima","year":"1983","unstructured":"Fukushima, K., Miyake, S., Ito, T.: Neocognitron: a neural network model for a mechanism of visual pattern recognition. IEEE Trans. Syst. Man Cybern. 5, 826\u2013834 (1983)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"issue":"11","key":"14_CR11","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1038\/s42256-020-00257-z","volume":"2","author":"R Geirhos","year":"2020","unstructured":"Geirhos, R., et al.: Shortcut learning in deep neural networks. Nat. Mach. Intell. 2(11), 665\u2013673 (2020)","journal-title":"Nat. Mach. Intell."},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"14_CR13","unstructured":"Gunasekar, S.: Generalization to translation shifts: a study in architectures and augmentations. arXiv preprint arXiv:2207.02349 (2022)"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1026\u20131034 (2015)","DOI":"10.1109\/ICCV.2015.123"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., et\u00a0al.: The many faces of robustness: a critical analysis of out-of-distribution generalization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8340\u20138349 (2021)","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"14_CR17","unstructured":"Hendrycks, D., Dietterich, T.: Benchmarking neural network robustness to common corruptions and perturbations. arXiv preprint arXiv:1903.12261 (2019)"},{"key":"14_CR18","unstructured":"Hendrycks, D., Gimpel, K.: A baseline for detecting misclassified and out-of-distribution examples in neural networks. arXiv preprint arXiv:1610.02136 (2016)"},{"key":"14_CR19","doi-asserted-by":"publisher","unstructured":"Ilharco, G., et al.: Openclip (2021). https:\/\/doi.org\/10.5281\/zenodo.5143773. if you use this software, please cite it as below","DOI":"10.5281\/zenodo.5143773"},{"issue":"4","key":"14_CR20","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun, Y., et al.: Backpropagation applied to handwritten zip code recognition. Neural Comput. 1(4), 541\u2013551 (1989)","journal-title":"Neural Comput."},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convnet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"14_CR22","unstructured":"Madan, S., Sasaki, T., Pfister, H., Li, T.M., Boix, X.: Adversarial examples within the training distribution: a widespread challenge. arXiv preprint arXiv:2106.16198 (2021)"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Michaeli, H., Michaeli, T., Soudry, D.: Alias-free convnets: fractional shift invariance via polynomial activations (2023)","DOI":"10.1109\/CVPR52729.2023.01567"},{"key":"14_CR24","first-page":"23296","volume":"34","author":"MM Naseer","year":"2021","unstructured":"Naseer, M.M., Ranasinghe, K., Khan, S.H., Hayat, M., Shahbaz Khan, F., Yang, M.H.: Intriguing properties of vision transformers. Adv. Neural. Inf. Process. Syst. 34, 23296\u201323308 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR25","unstructured":"Oquab, M., et\u00a0al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"14_CR26","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"14_CR27","unstructured":"Rahman, M.A., Yeh, R.A.: Truly scale-equivariant deep nets with Fourier layers. arXiv preprint arXiv:2311.02922 (2023)"},{"key":"14_CR28","first-page":"29935","volume":"34","author":"SA Rebuffi","year":"2021","unstructured":"Rebuffi, S.A., Gowal, S., Calian, D.A., Stimberg, F., Wiles, O., Mann, T.A.: Data augmentation can improve robustness. Adv. Neural. Inf. Process. Syst. 34, 29935\u201329948 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR29","unstructured":"Recht, B., Roelofs, R., Schmidt, L., Shankar, V.: Do imagenet classifiers generalize to imagenet? In: International Conference on Machine Learning, pp. 5389\u20135400. PMLR (2019)"},{"key":"14_CR30","doi-asserted-by":"crossref","unstructured":"Reddy, V.K., Siramoju, K.K., Sircar, P.: Object detection by 2-d continuous wavelet transform. In: 2014 International Conference on Computational Science and Computational Intelligence, vol.\u00a01, pp. 162\u2013167. IEEE (2014)","DOI":"10.1109\/CSCI.2014.34"},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Rojas-Gomez, R.A., Lim, T.Y., Do, M.N., Yeh, R.A.: Making vision transformers truly shift-equivariant. arXiv preprint arXiv:2305.16316 (2023)","DOI":"10.1109\/CVPR52733.2024.00532"},{"key":"14_CR32","first-page":"35755","volume":"35","author":"RA Rojas-Gomez","year":"2022","unstructured":"Rojas-Gomez, R.A., Lim, T.Y., Schwing, A., Do, M., Yeh, R.A.: Learnable polyphase sampling for shift invariant and equivariant convolutional networks. Adv. Neural. Inf. Process. Syst. 35, 35755\u201335768 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"3","key":"14_CR33","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision 115(3), 211\u2013252 (2015). https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int. J. Comput. Vision"},{"key":"14_CR34","unstructured":"Sarvaiya, J.N., Patnaik, S.: Automatic image registration using Mexican hat wavelet, invariant moment, and radon transform. IJACSA Int. J. Adv. Comput. Sci. Appl. (2011). Special Issue on Image Processing and Analysis"},{"key":"14_CR35","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR36","unstructured":"Srivastava, S., Ben-Yosef, G., Boix, X.: Minimal images in deep neural networks: Fragile object recognition in natural images. arXiv preprint arXiv:1902.03227 (2019)"},{"key":"14_CR37","doi-asserted-by":"crossref","unstructured":"Wortsman, M., et\u00a0al.: Robust fine-tuning of zero-shot models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7959\u20137971 (2022)","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"14_CR38","unstructured":"Wu, B., et al.: Visual transformers: token-based image representation and processing for computer vision (2020)"},{"key":"14_CR39","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: CvT: introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22\u201331 (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"14_CR40","unstructured":"Yuan, L., et\u00a0al.: Florence: a new foundation model for computer vision. arXiv preprint arXiv:2111.11432 (2021)"},{"key":"14_CR41","unstructured":"Zhang, R.: Making convolutional networks shift-invariant again (2019)"},{"issue":"1","key":"14_CR42","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/s11263-022-01672-y","volume":"131","author":"X Zou","year":"2023","unstructured":"Zou, X., Xiao, F., Yu, Z., Li, Y., Lee, Y.J.: Delving deeper into anti-aliasing in convnets. Int. J. Comput. Vis. 131(1), 67\u201381 (2023)","journal-title":"Int. J. Comput. Vis."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72890-7_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T20:05:38Z","timestamp":1733515538000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72890-7_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,7]]},"ISBN":["9783031728891","9783031728907"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72890-7_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,7]]},"assertion":[{"value":"7 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}