{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T00:54:09Z","timestamp":1767315249905,"version":"3.48.0"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032101846","type":"print"},{"value":"9783032101853","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-10185-3_14","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T00:49:07Z","timestamp":1767314947000},"page":"168-180","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["There is no Model to\u00a0Beat Them All: Recommendations for\u00a0Deep Learning Model Selection When Training on\u00a0Synthetic Images"],"prefix":"10.1007","author":[{"given":"Joachim","family":"R\u00fcter","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Johann C.","family":"Dauer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Umut","family":"Durak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Beery, S., et al.: Synthetic examples improve generalization for rare classes. In: IEEE\/CVF Winter Conference on Applications of Computer Vision (2020)","DOI":"10.1109\/WACV45572.2020.9093570"},{"key":"14_CR2","unstructured":"Cabon, Y., Murray, N., Humenberger, M.: Virtual KITTI 2. arXiv preprint arXiv:2001.10773 (2020)"},{"key":"14_CR3","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"14_CR5","unstructured":"Chu, X., et al.: Twins: revisiting the design of spatial attention in vision transformers. Adv. Neural Inf. Process. Syst. 34 (2021)"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Gaidon, A., Wang, Q., Cabon, Y., Vig, E.: Virtual worlds as proxy for multi-object tracking analysis. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.470"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Stiller, C., Urtasun, R.: Vision meets robotics: the kitti dataset. Int. J. Rob. Res. 32(11) (2013)","DOI":"10.1177\/0278364913491297"},{"key":"14_CR8","unstructured":"Geirhos, R., Rubisch, P., Michaelis, C., Bethge, M., Wichmann, F.A., Brendel, W.: Imagenet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. arXiv preprint arXiv:1811.12231 (2018)"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Han, K., et\u00a0al.: A survey on vision transformer. IEEE Trans. Pattern Anal. Mach. Intell. 45(1) (2022)","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Hinniger, C., R\u00fcter, J.: Synthetic training data for semantic segmentation of the environment from uav perspective. Aerospace 10(7) (2023)","DOI":"10.3390\/aerospace10070604"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Howard, A., et\u00a0al.: Searching for mobilenetv3. In: IEEE\/CVF International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Kiefer, B., Ott, D., Zell, A.: Leveraging synthetic data in object detection on unmanned aerial vehicles. In: 26th International Conference on Pattern Recognition (2022)","DOI":"10.1109\/ICPR56361.2022.9956710"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Kre\u0161o, I., \u010cau\u0161evi\u0107, D., Krapac, J., \u0160egvi\u0107, S.: Convolutional scale invariance for semantic segmentation. In: German Conference on Pattern Recognition (2016)","DOI":"10.1007\/978-3-319-45886-1_6"},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Krump, M., St\u00fctz, P.: UAV based vehicle detection on real and synthetic image pairs: performance differences and influence analysis of context and simulation parameters. In: MESAS (2022)","DOI":"10.1007\/978-3-030-98260-7_1"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convnet for the 2020s. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: IEEE Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"14_CR19","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2019)"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Marcu, A., Licaret, V., Costea, D., Leordeanu, M.: Semantics through time: semi-supervised segmentation of aerial videos with iterative label propagation. In: Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69525-5_32"},{"key":"14_CR21","unstructured":"Naseer, M.M., Ranasinghe, K., Khan, S.H., Hayat, M., Shahbaz\u00a0Khan, F., Yang, M.H.: Intriguing properties of vision transformers. Adv. Neural Inf. Process. Syst. 34 (2021)"},{"key":"14_CR22","unstructured":"Nowruzi, F.E., Kapoor, P., Kolhatkar, D., Hassanat, F.A., Laganiere, R., Rebut, J.: How much real data do we actually need: analyzing object detection performance using synthetic and real data. arXiv preprint arXiv:1907.07061v1 (2019)"},{"key":"14_CR23","unstructured":"Park, N., Kim, S.: How do vision transformers work? arXiv preprint arXiv:2202.06709 (2022)"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Richter, S.R., Vineet, V., Roth, S., Koltun, V.: Playing for data: ground truth from computer games. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46475-6_7"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Ros, G., Sellart, L., Materzynska, J., Vazquez, D., Lopez, A.M.: The SYNTHIA dataset: a large collection of synthetic images for semantic segmentation of urban scenes. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.352"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Ruis, F.A., et al.: Improving object detector training on synthetic data by starting with a strong baseline methodology. In: Synthetic Data for Artificial Intelligence and Machine Learning, Tools, Techniques, and Applications II (2024)","DOI":"10.1117\/12.3013441"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"R\u00fcter, J., Durak, U., Dauer, J.C.: Investigating the influence of image augmentations on the sim-to-real generalization of deep learning perception models. In: Synthetic Data for Computer Vision Workshop @CVPR (2025)","DOI":"10.3390\/jimaging10100259"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"R\u00fcter, J., Maienschein, T., Schirmer, S., Schopferer, S., Torens, C.: Filling the gaps: using synthetic low-altitude aerial images to increase operational design domain coverage. Sensors 24(4) (2024)","DOI":"10.3390\/s24041144"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"R\u00fcter, J., Durak, U., Dauer, J.C.: Investigating the sim-to-real generalizability of deep learning object detection models. J. Imaging 10(10) (2024)","DOI":"10.3390\/jimaging10100259"},{"key":"14_CR30","doi-asserted-by":"crossref","unstructured":"R\u00fcter, J., Schmidt, R.: Using only synthetic images to train a drogue detector for aerial refueling. In: MESAS (2023)","DOI":"10.1007\/978-3-031-71397-2_25"},{"key":"14_CR31","doi-asserted-by":"crossref","unstructured":"Saleh, F.S., Aliakbarian, M.S., Salzmann, M., Petersson, L., Alvarez, J.M.: Effective use of synthetic data for urban scene semantic segmentation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01216-8_6"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"14_CR33","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning (2021)"},{"key":"14_CR34","unstructured":"Wilson, A.C., Roelofs, R., Stern, M., Srebro, N., Recht, B.: The marginal value of adaptive gradient methods in machine learning. arXiv preprint arXiv:1705.08292 (2018)"},{"key":"14_CR35","unstructured":"Wrenninge, M., Unger, J.: Synscapes: a photorealistic synthetic dataset for street scene parsing. arXiv preprint arXiv:1810.08705 (2018)"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"14_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, H., et\u00a0al.: Resnest: split-attention networks. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPRW56347.2022.00309"},{"key":"14_CR38","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"14_CR39","doi-asserted-by":"crossref","unstructured":"Zhou, B., et al.: Semantic understanding of scenes through the ADE20K dataset. Int. J. Comput. Vision 127 (2019)","DOI":"10.1007\/s11263-018-1140-0"}],"container-title":["Lecture Notes in Computer Science","Image Analysis and Processing \u2013 ICIAP 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-10185-3_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T00:49:10Z","timestamp":1767314950000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-10185-3_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032101846","9783032101853"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-10185-3_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image Analysis and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iciap2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iciap.org\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}