{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T00:16:36Z","timestamp":1758672996388,"version":"3.44.0"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031851803"},{"type":"electronic","value":"9783031851810"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-85181-0_11","type":"book-chapter","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T06:58:35Z","timestamp":1745305115000},"page":"164-180","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient and\u00a0Discriminative Image Feature Extraction for\u00a0Universal Image Retrieval"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8425-5161","authenticated-orcid":false,"given":"Morris","family":"Florek","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5344-4172","authenticated-orcid":false,"given":"David","family":"Tschirschwitz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1019-9538","authenticated-orcid":false,"given":"Bj\u00f6rn","family":"Barz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4815-0118","authenticated-orcid":false,"given":"Volker","family":"Rodehorst","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,23]]},"reference":[{"key":"11_CR1","doi-asserted-by":"publisher","unstructured":"Almaz\u00e1n, J., Ko, B., Gu, G., Larlus, D., Kalantidis, Y.: Granularity-aware adaptation for image retrieval over multiple tasks. In: Proceedings of the European Conference on Computer Vision (ECCV) (2022). https:\/\/doi.org\/10.1007\/978-3-031-19781-9_23","DOI":"10.1007\/978-3-031-19781-9_23"},{"key":"11_CR2","unstructured":"Araujo, A., et al.: Google Universal Image Embedding (2022). https:\/\/kaggle.com\/competitions\/google-universal-image-embedding"},{"key":"11_CR3","doi-asserted-by":"publisher","unstructured":"Bai, Y., Chen, Y., Yu, W., Wang, L., Zhang, W.: Products\u201310K: a Large-scale Product Recognition Dataset. arXiv preprint arXiv:2008.10545 (2020). https:\/\/doi.org\/10.48550\/arXiv.1504.08083","DOI":"10.48550\/arXiv.1504.08083"},{"key":"11_CR4","unstructured":"Bommasani, R., et al.: On the Opportunities and Risks of Foundation Models. arXiv preprint arXiv:2108.07258 (2022)"},{"key":"11_CR5","doi-asserted-by":"publisher","unstructured":"Bossard, L., Guillaumin, M., Van\u00a0Gool, L.: Food-101 \u2013 mining discriminative components with random forests. In: Proceedings of the European Conference on Computer Vision (ECCV) (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"11_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1007\/978-3-030-58565-5_43","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Cao","year":"2020","unstructured":"Cao, B., Araujo, A., Sim, J.: Unifying deep local and global features for image search. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 726\u2013743. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_43"},{"key":"11_CR7","unstructured":"Chen, X., et al.: PaLI: a jointly-scaled multilingual language-image model. In: Proceedings of the International Conference on Learning Representations (ICLR) (2023)"},{"key":"11_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"741","DOI":"10.1007\/978-3-030-58621-8_43","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Deng","year":"2020","unstructured":"Deng, J., Guo, J., Liu, T., Gong, M., Zafeiriou, S.: Sub-center ArcFace: boosting face recognition by large-scale noisy web faces. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12356, pp. 741\u2013757. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58621-8_43"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., Zafeiriou, S.: ArcFace: additive angular margin loss for deep face recognition. in: proceedings of the ieee\/cvf conference on computer vision and pattern recognition (cvpr) (2019).https:\/\/doi.org\/10.1109\/CVPR.2019.00482","DOI":"10.1109\/CVPR.2019.00482"},{"key":"11_CR10","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: Proceedings of the International Conference on Learning Representations (ICLR) (2021)"},{"key":"11_CR11","doi-asserted-by":"publisher","unstructured":"Feng, Y., et al.: Unifying Specialist Image Embedding into Universal Image Embedding. arXiv preprint arXiv:2003.03701 (2020). https:\/\/doi.org\/10.48550\/arXiv.2003.03701","DOI":"10.48550\/arXiv.2003.03701"},{"key":"11_CR12","unstructured":"Gadre, S.Y., et al.: DataComp: in search of the next generation of multimodal datasets. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) Track on Datasets and Benchmarks (2023)"},{"key":"11_CR13","unstructured":"Garc\u00eda\u00a0Ling, C., HMGroup, E., Rim, F., inversion, Ferrando, J., Maggie, neuraloverflow, xlsrln: H &M Personalized Fashion Recommendations (2022). https:\/\/kaggle.com\/competitions\/h-and-m-personalized-fashion-recommendations"},{"key":"11_CR14","doi-asserted-by":"publisher","unstructured":"Ge, Y., Zhang, R., Wang, X., Tang, X., Luo, P.: DeepFashion2: a versatile benchmark for detection, pose estimation, segmentation and re-identification of clothing images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00548","DOI":"10.1109\/CVPR.2019.00548"},{"key":"11_CR15","doi-asserted-by":"publisher","unstructured":"Han, X., et al.: Automatic Spatially-Aware Fashion Concept Discovery. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.163","DOI":"10.1109\/ICCV.2017.163"},{"key":"11_CR16","unstructured":"Howard, A., Liew, C., Wong, M., Dane, S.: Shopee - Price Match Guarantee (2021). https:\/\/kaggle.com\/competitions\/shopee-product-matching"},{"key":"11_CR17","doi-asserted-by":"publisher","unstructured":"Huang, X., Li, Q.: 2nd Place Solution to Google Universal Image Embedding. arXiv preprint arXiv:2210.08735 (2022). https:\/\/doi.org\/10.48550\/arXiv.2210.08735","DOI":"10.48550\/arXiv.2210.08735"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Huang, Y., et al.: CurricularFace: adaptive curriculum learning loss for deep face recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00594"},{"key":"11_CR19","doi-asserted-by":"publisher","unstructured":"Ilharco, G., et al.: OpenCLIP (2021). https:\/\/doi.org\/10.5281\/zenodo.5143773","DOI":"10.5281\/zenodo.5143773"},{"key":"11_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2012.4","author":"AK Jain","year":"2012","unstructured":"Jain, A.K., Klare, B., Park, U.: Face matching and retrieval in forensics applications. IEEE Multimedia (2012). https:\/\/doi.org\/10.1109\/MMUL.2012.4","journal-title":"IEEE Multimedia"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Kim, M., Jain, A.K., Liu, X.: AdaFace: quality adaptive margin for face recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.01819"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"11_CR23","unstructured":"Koo: 10th Place Solution of the Google Universal Image Embedding Challenge (2022). https:\/\/www.kaggle.com\/competitions\/google-universal-image-embedding\/discussion\/359271"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) Workshops (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"11_CR25","unstructured":"Li, X., Wang, Z., Xie, C.: An inverse scaling law for CLIP training. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2023)"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, F., Hu, Q., Leng, C.: AirFace: lightweight and efficient model for face recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00327"},{"key":"11_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.07.139","author":"X Li","year":"2021","unstructured":"Li, X., Yang, J., Ma, J.: Recent developments of content-based image retrieval (CBIR). Neurocomputing (2021). https:\/\/doi.org\/10.1016\/j.neucom.2020.07.139","journal-title":"Neurocomputing"},{"key":"11_CR28","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, P., Qiu, S., Wang, X., Tang, X.: DeepFashion: powering robust clothes recognition and retrieval with rich annotations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.124"},{"key":"11_CR29","unstructured":"Mohanty, S., Khandelwal, S.: AIcrowd | Food Recognition Benchmark 2022 | Challenges (2022). https:\/\/www.aicrowd.com\/challenges\/food-recognition-benchmark-2022"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Noh, H., Araujo, A., Sim, J., Weyand, T., Han, B.: Large-Scale Image Retrieval With Attentive Deep Local Features. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.374"},{"key":"11_CR31","unstructured":"Oquab, M., et al.: DINOv2: learning robust visual features without supervision. Trans. Mach. Learn. Res. (TMLR) (2024)"},{"key":"11_CR32","doi-asserted-by":"publisher","unstructured":"Ota, N., Yokoi, S., Yamaoka, S.: 5th place solution to kaggle google universal image embedding competition. arXiv preprint arXiv:2210.09495 (2022).https:\/\/doi.org\/10.48550\/arXiv.2210.09495","DOI":"10.48550\/arXiv.2210.09495"},{"key":"11_CR33","doi-asserted-by":"publisher","unstructured":"Panetta, K., Kezebou, L., Oludare, V., Intriligator, J., Agaian, S.: Artificial intelligence for text-based vehicle search, recognition, and continuous localization in traffic videos. AI (2021). https:\/\/doi.org\/10.3390\/ai2040041","DOI":"10.3390\/ai2040041"},{"key":"11_CR34","doi-asserted-by":"publisher","unstructured":"Peng, J., Xiao, C., Li, Y.: RP2K: a large-scale retail product dataset for fine-grained image classification. arXiv preprint arXiv:2006.12634 (2021). https:\/\/doi.org\/10.48550\/arXiv.2006.12634","DOI":"10.48550\/arXiv.2006.12634"},{"key":"11_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.05.025","author":"A Qayyum","year":"2017","unstructured":"Qayyum, A., Anwar, S.M., Awais, M., Majid, M.: Medical image retrieval using deep convolutional neural network. Neurocomputing (2017). https:\/\/doi.org\/10.1016\/j.neucom.2017.05.025","journal-title":"Neurocomputing"},{"key":"11_CR36","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the International Conference on Machine Learning (ICML) (2021)"},{"key":"11_CR37","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Inter. J. Comput. Vis. (IJCV) (2015). https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Inter. J. Comput. Vis. (IJCV)"},{"key":"11_CR38","doi-asserted-by":"publisher","unstructured":"Schall, K., Barthel, K.U., Hezel, N., Jung, K.: GPR1200: a benchmark for general-purpose content-based image retrieval. In: Proceedings of the International Conference on Multimedia Modeling (MMM) (2022). https:\/\/doi.org\/10.1007\/978-3-030-98358-1_17","DOI":"10.1007\/978-3-030-98358-1_17"},{"key":"11_CR39","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) Track on Datasets and Benchmarks (2022)"},{"key":"11_CR40","doi-asserted-by":"publisher","unstructured":"Shao, S., Cui, Q.: 1st Place Solution in Google Universal Images Embedding. arXiv preprint arXiv:2210.08473 (2022). https:\/\/doi.org\/10.48550\/arXiv.2210.08473","DOI":"10.48550\/arXiv.2210.08473"},{"key":"11_CR41","doi-asserted-by":"publisher","unstructured":"Song, H.O., Xiang, Y., Jegelka, S., Savarese, S.: Deep metric learning via lifted structured feature embedding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.434","DOI":"10.1109\/CVPR.2016.434"},{"key":"11_CR42","doi-asserted-by":"publisher","unstructured":"Sun, Q., Fang, Y., Wu, L., Wang, X., Cao, Y.: EVA-CLIP: improved Training Techniques for CLIP at Scale. arXiv preprint arXiv:2303.15389 (2023https:\/\/doi.org\/10.48550\/arXiv.2303.15389","DOI":"10.48550\/arXiv.2303.15389"},{"key":"11_CR43","unstructured":"Tan, M., Le, Q.: EfficientNet: rethinking model scaling for convolutional neural networks. In: Proceedings of the International Conference on Machine Learning (ICML) (2019)"},{"key":"11_CR44","doi-asserted-by":"publisher","unstructured":"Wang, S., Jiang, S.: INSTRE: a new benchmark for instance-level object retrieval and recognition. ACM Trans. Multimedia Comput. Commun. Appli. (TOMM) (2015). https:\/\/doi.org\/10.1145\/2700292","DOI":"10.1145\/2700292"},{"key":"11_CR45","doi-asserted-by":"crossref","unstructured":"Weyand, T., Araujo, A., Cao, B., Sim, J.: Google landmarks dataset v2 \u2013 a large-scale benchmark for instance-level recognition and retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00265"},{"key":"11_CR46","doi-asserted-by":"publisher","unstructured":"Xu, H., et al.: Demystifying CLIP Data. arXiv preprint arXiv:2309.16671 (2023). https:\/\/doi.org\/10.48550\/arXiv.2309.16671","DOI":"10.48550\/arXiv.2309.16671"},{"key":"11_CR47","doi-asserted-by":"crossref","unstructured":"Ypsilantis, N.A., et al.: Towards universal image embeddings: a large-scale dataset and challenge for generic image representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.01037"},{"key":"11_CR48","unstructured":"Ypsilantis, N.A., Garcia, N., Han, G., Ibrahimi, S., Noord, N.V., Tolias, G.: The Met Dataset: Instance-level Recognition for Artworks. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) Track on Datasets and Benchmarks (2021)"},{"key":"11_CR49","doi-asserted-by":"crossref","unstructured":"Zhai, X., Mustafa, B., Kolesnikov, A., Beyer, L.: Sigmoid Loss for Language Image Pre-Training. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"11_CR50","doi-asserted-by":"publisher","unstructured":"Zhang, X., Zhao, R., Qiao, Y., Wang, X., Li, H.: AdaCos: adaptively scaling cosine logits for effectively learning deep face representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.01108","DOI":"10.1109\/CVPR.2019.01108"},{"key":"11_CR51","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2017.2712798","author":"X Zhang","year":"2018","unstructured":"Zhang, X., Wang, S., Li, Z., Ma, S.: Landmark Image Retrieval by Jointing Feature Refinement and Multimodal Classifier Learning. IEEE Transactions on Cybernetics (2018). https:\/\/doi.org\/10.1109\/TCYB.2017.2712798","journal-title":"IEEE Transactions on Cybernetics"},{"key":"11_CR52","doi-asserted-by":"publisher","unstructured":"Zhang, Y., et al.: Visual search at alibaba. In: Proceedings of the ACM International Conference on Knowledge Discovery & Data Mining (KDD) (2018). https:\/\/doi.org\/10.1145\/3219819.3219820","DOI":"10.1145\/3219819.3219820"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-85181-0_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T13:16:08Z","timestamp":1758633368000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-85181-0_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031851803","9783031851810"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-85181-0_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"23 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}