{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T19:33:07Z","timestamp":1772825587909,"version":"3.50.1"},"reference-count":111,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T00:00:00Z","timestamp":1742256000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T00:00:00Z","timestamp":1742256000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"DOI":"10.1007\/s11063-025-11742-0","type":"journal-article","created":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T11:23:10Z","timestamp":1742296990000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Review: One-Shot Object Detection Methods for Conditional Detection of Retail and Warehouse Products"],"prefix":"10.1007","volume":"57","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6533-2477","authenticated-orcid":false,"given":"Matthieu","family":"Desmarescaux","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9014-3053","authenticated-orcid":false,"given":"Wissam","family":"Kaddah","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3965-4648","authenticated-orcid":false,"given":"Ayman","family":"Alfalou","sequence":"additional","affiliation":[]},{"given":"Jean-Charles","family":"Deconninck","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,18]]},"reference":[{"key":"11742_CR1","unstructured":"Agarap AF (2018) Deep learning using rectified linear units (relu). arXiv:1803.08375"},{"key":"11742_CR2","doi-asserted-by":"publisher","unstructured":"Albawi S, Mohammed TA, Al-Zawi S (2017) Understanding of a convolutional neural network. In: 2017 International conference on engineering and technology (ICET), pp 1\u20136. https:\/\/doi.org\/10.1109\/ICEngTechnol.2017.8308186","DOI":"10.1109\/ICEngTechnol.2017.8308186"},{"key":"11742_CR3","doi-asserted-by":"publisher","unstructured":"Bay H, Tuytelaars T, Van Gool L (2006) SURF: speeded up robust features. In: Leonardis A, Bischof H, Pinz A (eds) Computer vision\u2014ECCV 2006. Lecture notes in computer science. Springer, Berlin, pp 404\u2013417. https:\/\/doi.org\/10.1007\/11744023_32","DOI":"10.1007\/11744023_32"},{"key":"11742_CR4","doi-asserted-by":"publisher","unstructured":"Bertinetto L, Valmadre J, Henriques JF et al (2016) Fully-convolutional Siamese networks for object tracking. In: Hua G, J\u00e9gou H (eds) Computer vision\u2014ECCV 2016 workshops. Lecture Notes in Computer Science. Springer, Cham, pp 850\u2013865. https:\/\/doi.org\/10.1007\/978-3-319-48881-3_56","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"11742_CR5","unstructured":"Bochkovskiy A, Wang CY, Liao HYM (2020) YOLOv4: optimal speed and accuracy of object detection. arXiv:2004.10934 [cs, eess]"},{"key":"11742_CR6","doi-asserted-by":"publisher","unstructured":"Cai Z, Vasconcelos N (2018) Cascade R-CNN: delving into high quality object detection. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 6154\u20136162. https:\/\/doi.org\/10.1109\/CVPR.2018.00644","DOI":"10.1109\/CVPR.2018.00644"},{"key":"11742_CR7","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G et\u00a0al (2020) End-to-end object detection with transformers. arXiv:2005.12872 [cs]","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"11742_CR8","doi-asserted-by":"publisher","unstructured":"Chen DJ, Hsieh HY, Liu TL (2021) Adaptive image transformer for one-shot object detection. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 12242\u201312251. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01207","DOI":"10.1109\/CVPR46437.2021.01207"},{"key":"11742_CR9","doi-asserted-by":"publisher","unstructured":"Chen P, Zhang Y, Li Z et\u00a0al (2022) Few-shot incremental learning for label-to-image translation. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 3687\u20133697. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00368","DOI":"10.1109\/CVPR52688.2022.00368"},{"key":"11742_CR10","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1109\/TMM.2021.3125195","volume":"25","author":"TI Chen","year":"2023","unstructured":"Chen TI, Liu YC, Su HT et al (2023) Dual-awareness attention for few-shot object detection. IEEE Trans Multim 25:291\u2013301. https:\/\/doi.org\/10.1109\/TMM.2021.3125195","journal-title":"IEEE Trans Multim"},{"key":"11742_CR11","unstructured":"Dai J, Li Y, He K et\u00a0al (2016) R-FCN: object detection via region-based fully convolutional networks. In: Advances in neural information processing systems, vol\u00a029. Curran Associates, Inc"},{"key":"11742_CR12","unstructured":"Demirel B, Cinbis RG, Ikizler-Cinbis N (2018) Zero-shot object detection by hybrid region embedding. arXiv:1805.06157 [cs]"},{"issue":"2","key":"11742_CR13","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CKI et al (2010) The Pascal visual object classes (VOC) challenge. Int J Comput Vis 88(2):303\u2013338. https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int J Comput Vis"},{"key":"11742_CR14","doi-asserted-by":"publisher","unstructured":"Fan Q, Zhuo W, Tang CK et\u00a0al (2020) Few-shot object detection with attention-RPN and multi-relation detector. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 4012\u20134021. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00407","DOI":"10.1109\/CVPR42600.2020.00407"},{"key":"11742_CR15","unstructured":"Fang Y, Liao B, Wang X et\u00a0al (2021) You only look at one sequence: rethinking transformer in vision through object detection. In: Advances in neural information processing systems, vol\u00a034. Curran Associates, Inc., pp 26183\u201326197"},{"key":"11742_CR16","doi-asserted-by":"crossref","unstructured":"Follmann P, Bottger T, Hartinger P et\u00a0al (2018) MVTec D2S: densely segmented supermarket dataset, pp 569\u2013585","DOI":"10.1007\/978-3-030-01249-6_35"},{"key":"11742_CR17","doi-asserted-by":"publisher","first-page":"77597","DOI":"10.1109\/ACCESS.2019.2922438","volume":"7","author":"K Fu","year":"2019","unstructured":"Fu K, Zhang T, Zhang Y et al (2019) Meta-SSD: towards fast adaptation for few-shot object detection with meta-learning. IEEE Access 7:77597\u201377606. https:\/\/doi.org\/10.1109\/ACCESS.2019.2922438","journal-title":"IEEE Access"},{"key":"11742_CR18","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/j.neucom.2020.04.092","volume":"425","author":"K Fu","year":"2021","unstructured":"Fu K, Zhang T, Zhang Y et al (2021) OSCD: a one-shot conditional object detection framework. Neurocomputing 425:243\u2013255. https:\/\/doi.org\/10.1016\/j.neucom.2020.04.092","journal-title":"Neurocomputing"},{"key":"11742_CR19","unstructured":"Ge Z, Liu S, Wang F et\u00a0al (2021) YOLOX: exceeding YOLO series in 2021. arXiv:2107.08430 [cs]"},{"key":"11742_CR20","doi-asserted-by":"publisher","unstructured":"George M, Floerkemeier C (2014) Recognizing products: a per-exemplar multi-label image classification approach. In: Fleet D, Pajdla T, Schiele B, et\u00a0al (eds) Computer vision\u2014ECCV 2014. Lecture notes in computer science. Springer, Cham, pp 440\u2013455. https:\/\/doi.org\/10.1007\/978-3-319-10605-2_29","DOI":"10.1007\/978-3-319-10605-2_29"},{"key":"11742_CR21","doi-asserted-by":"publisher","unstructured":"Ghiasi G, Lin TY, Le QV (2019) NAS-FPN: learning scalable feature pyramid architecture for object detection. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 7029\u20137038. https:\/\/doi.org\/10.1109\/CVPR.2019.00720, iSSN: 2575-7075","DOI":"10.1109\/CVPR.2019.00720"},{"key":"11742_CR22","doi-asserted-by":"publisher","unstructured":"Girshick R (2015) Fast R-CNN. In: 2015 IEEE international conference on computer vision (ICCV), pp 1440\u20131448. https:\/\/doi.org\/10.1109\/ICCV.2015.169","DOI":"10.1109\/ICCV.2015.169"},{"key":"11742_CR23","doi-asserted-by":"publisher","unstructured":"Girshick R, Donahue J, Darrell T et\u00a0al (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: 2014 IEEE conference on computer vision and pattern recognition, pp 580\u2013587. https:\/\/doi.org\/10.1109\/CVPR.2014.81","DOI":"10.1109\/CVPR.2014.81"},{"key":"11742_CR24","doi-asserted-by":"publisher","unstructured":"Goldman E, Herzig R, Eisenschtat A et\u00a0al (2019) Precise detection in densely packed scenes. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 5222\u20135231. https:\/\/doi.org\/10.1109\/CVPR.2019.00537","DOI":"10.1109\/CVPR.2019.00537"},{"key":"11742_CR25","unstructured":"Gouda A, Roidl M (2023) DoUnseen: tuning-free class-adaptive object detection of unseen objects for robotic grasping. arXiv:2304.02833 [cs]"},{"key":"11742_CR26","doi-asserted-by":"crossref","unstructured":"Gouda A, Ghanem A, Reining C (2022) DoPose-6D dataset for object segmentation and 6D pose estimation. arXiv:2204.13613 [cs]","DOI":"10.1109\/ICMLA55696.2022.00077"},{"key":"11742_CR27","doi-asserted-by":"crossref","unstructured":"Griffin B (2022) Mobile robot manipulation using pure object detection. arXiv:2201.12437 [cs]","DOI":"10.1109\/WACV56688.2023.00063"},{"issue":"5","key":"11742_CR28","doi-asserted-by":"publisher","first-page":"2871","DOI":"10.3390\/app13052871","volume":"13","author":"V Guimar\u00e3es","year":"2023","unstructured":"Guimar\u00e3es V, Nascimento J, Viana P et al (2023) A review of recent advances and challenges in grocery label detection and recognition. Appl Sci 13(5):2871. https:\/\/doi.org\/10.3390\/app13052871","journal-title":"Appl Sci"},{"key":"11742_CR29","doi-asserted-by":"publisher","unstructured":"Han G, He Y, Huang S et\u00a0al (2021) Query adaptive few-shot object detection with heterogeneous graph convolutional networks. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), pp 3243\u20133252. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00325","DOI":"10.1109\/ICCV48922.2021.00325"},{"issue":"1","key":"11742_CR30","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1609\/aaai.v36i1.19959","volume":"36","author":"G Han","year":"2022","unstructured":"Han G, Huang S, Ma J et al (2022) Meta faster R-CNN: towards accurate few-shot object detection with attentive feature alignment. Proc AAAI Conf Artif Intell 36(1):780\u2013789. https:\/\/doi.org\/10.1609\/aaai.v36i1.19959","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"11742_CR31","doi-asserted-by":"crossref","unstructured":"Han G, Ma J, Huang S et\u00a0al (2022) Few-shot object detection with fully cross-transformer, pp 5321\u20135330","DOI":"10.1109\/CVPR52688.2022.00525"},{"key":"11742_CR32","unstructured":"Han Y, Zhang J, Xue Z et\u00a0al (2023) Reference twice: a simple and unified baseline for few-shot instance segmentation. arXiv:2301.01156 [cs]"},{"key":"11742_CR33","volume-title":"Neural networks: a comprehensive foundation","author":"S Haykin","year":"1994","unstructured":"Haykin S (1994) Neural networks: a comprehensive foundation. Prentice Hall PTR, Hoboken"},{"key":"11742_CR34","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S et\u00a0al (2016) Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"11742_CR35","doi-asserted-by":"publisher","unstructured":"He K, Gkioxari G, Doll\u00e1r P et\u00a0al (2017) Mask R-CNN. In: 2017 IEEE international conference on computer vision (ICCV), pp 2980\u20132988. https:\/\/doi.org\/10.1109\/ICCV.2017.32","DOI":"10.1109\/ICCV.2017.32"},{"key":"11742_CR36","unstructured":"Hsieh TI, Lo YC, Chen HT et\u00a0al (2019) One-shot object detection with co-attention and co-excitation. In: Advances in neural information processing systems, vol\u00a032. Curran Associates, Inc"},{"key":"11742_CR37","doi-asserted-by":"publisher","unstructured":"Hu H, Bai S, Li A et\u00a0al (2021) Dense relation distillation with context-aware aggregation for few-shot object detection. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 10180\u201310189. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01005","DOI":"10.1109\/CVPR46437.2021.01005"},{"key":"11742_CR38","doi-asserted-by":"publisher","unstructured":"Hu T, Mettes P, Huang JH et\u00a0al (2019) SILCO: show a few images, localize the common object. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 5066\u20135075. https:\/\/doi.org\/10.1109\/ICCV.2019.00517","DOI":"10.1109\/ICCV.2019.00517"},{"key":"11742_CR39","unstructured":"Huang Q, Zhang H, Xue M et\u00a0al (2022) A survey of deep learning for low-shot object detection. arXiv:2112.02814 [cs]"},{"key":"11742_CR40","unstructured":"Huynh D, Elhamifar E (2021) Compositional fine-grained low-shot learning. arXiv:abs\/2105.10438"},{"key":"11742_CR41","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167 [cs]"},{"key":"11742_CR42","unstructured":"Jocher G, Chaurasia A, Qiu J (2023) Ultralytics YOLO"},{"issue":"9","key":"11742_CR43","doi-asserted-by":"publisher","first-page":"1293","DOI":"10.1007\/s00371-018-1515-9","volume":"35","author":"W Kaddah","year":"2019","unstructured":"Kaddah W, Elbouz M, Ouerhani Y et al (2019) Optimized minimal path selection (OMPS) method for automatic and unsupervised crack segmentation within two-dimensional pavement images. Vis Comput 35(9):1293\u20131309. https:\/\/doi.org\/10.1007\/s00371-018-1515-9","journal-title":"Vis Comput"},{"key":"11742_CR44","doi-asserted-by":"publisher","unstructured":"Kaddah W, Sedgh\u00a0Gooya E, Alfalou A (2022) Smart system for blurring undesirable objects in road applications. In: Alam MS, Asari VK (eds) Pattern recognition and tracking XXXIII. SPIE, Orlando, p 4. https:\/\/doi.org\/10.1117\/12.2618745","DOI":"10.1117\/12.2618745"},{"key":"11742_CR45","doi-asserted-by":"publisher","unstructured":"Kang B, Liu Z, Wang X et\u00a0al (2019) Few-shot object detection via feature reweighting. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 8419\u20138428. https:\/\/doi.org\/10.1109\/ICCV.2019.00851","DOI":"10.1109\/ICCV.2019.00851"},{"key":"11742_CR46","doi-asserted-by":"publisher","unstructured":"Kaul P, Xie W, Zisserman A (2022) Label, verify, correct: a simple few shot object detection method. In: 2022 IEEE\/cvf conference on computer vision and pattern recognition (CVPR), pp 14217\u201314227. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01384","DOI":"10.1109\/CVPR52688.2022.01384"},{"key":"11742_CR47","doi-asserted-by":"publisher","unstructured":"Kim G, Jung HG, Lee SW (2020) Few-shot object detection via knowledge transfer. In: 2020 IEEE international conference on systems, man, and cybernetics (SMC), pp 3564\u20133569. https:\/\/doi.org\/10.1109\/SMC42975.2020.9283497","DOI":"10.1109\/SMC42975.2020.9283497"},{"key":"11742_CR48","unstructured":"Koch G, Zemel R, Salakhutdinov R et\u00a0al (2015) Siamese neural networks for one-shot image recognition. In: ICML deep learning workshop, vol\u00a02"},{"issue":"6","key":"11742_CR49","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) ImageNet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun ACM"},{"key":"11742_CR50","doi-asserted-by":"publisher","unstructured":"K\u00f6hler M, Eisenbach M, Gross HM (2022) Few-shot object detection: a comprehensive survey. https:\/\/doi.org\/10.48550\/arXiv.2112.11699","DOI":"10.48550\/arXiv.2112.11699"},{"key":"11742_CR51","doi-asserted-by":"publisher","unstructured":"Lee H, Lee M, Kwak N (2022) Few-shot object detection by attending to per-sample-prototype. In: 2022 IEEE\/CVF winter conference on applications of computer vision (WACV). IEEE, Waikoloa, pp 1101\u20131110. https:\/\/doi.org\/10.1109\/WACV51458.2022.00117","DOI":"10.1109\/WACV51458.2022.00117"},{"key":"11742_CR52","doi-asserted-by":"publisher","unstructured":"Leutenegger S, Chli M, Siegwart RY (2011) BRISK: binary robust invariant scalable keypoints. In: 2011 international conference on computer vision, pp 2548\u20132555. https:\/\/doi.org\/10.1109\/ICCV.2011.6126542","DOI":"10.1109\/ICCV.2011.6126542"},{"key":"11742_CR53","doi-asserted-by":"publisher","unstructured":"Li B, Yan J, Wu W et\u00a0al (2018) High performance visual tracking with siamese region proposal network. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 8971\u20138980. https:\/\/doi.org\/10.1109\/CVPR.2018.00935","DOI":"10.1109\/CVPR.2018.00935"},{"key":"11742_CR54","unstructured":"Li X, Zhang L, Chen YP et\u00a0al (2020) One-shot object detection without fine-tuning. arXiv:2005.03819 [cs, eess]"},{"key":"11742_CR55","unstructured":"Li Y, Feng W, Lyu S et\u00a0al (2020b) MM-FSOD: meta and metric integrated few-shot object detection. arXiv:2012.15159 [cs]"},{"key":"11742_CR56","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S et\u00a0al (2015) Microsoft COCO: common objects in context. arXiv:1405.0312 [cs]","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"11742_CR57","unstructured":"Lin W, Deng Y, Gao Y et\u00a0al (2021) CAT: cross-attention transformer for one-shot object detection. arXiv:2104.14984 [cs]"},{"issue":"8","key":"11742_CR58","doi-asserted-by":"publisher","first-page":"3448","DOI":"10.1109\/TNNLS.2021.3053005","volume":"33","author":"L Liu","year":"2022","unstructured":"Liu L, Wang B, Kuang Z et al (2022) GenDet: meta learning to generate detectors from few shots. IEEE Trans Neural Netw Learn Syst 33(8):3448\u20133460. https:\/\/doi.org\/10.1109\/TNNLS.2021.3053005","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11742_CR59","doi-asserted-by":"publisher","unstructured":"Liu W, Anguelov D, Erhan D et\u00a0al (2016) SSD: single shot multibox detector. In: Leibe B, Matas J, Sebe N, et\u00a0al (eds) Computer vision\u2014ECCV 2016. Lecture Notes in Computer Science, vol 9905. Springer, Cham, p 21\u201337. https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"11742_CR60","doi-asserted-by":"publisher","unstructured":"Lowe D (1999) Object recognition from local scale-invariant features. In: Proceedings of the seventh IEEE international conference on computer vision, vol 2, pp 1150\u20131157. https:\/\/doi.org\/10.1109\/ICCV.1999.790410","DOI":"10.1109\/ICCV.1999.790410"},{"issue":"2","key":"11742_CR61","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110. https:\/\/doi.org\/10.1023\/B:VISI.0000029664.99615.94","journal-title":"Int J Comput Vis"},{"key":"11742_CR62","doi-asserted-by":"publisher","unstructured":"Merler M, Galleguillos C, Belongie S (2007) Recognizing groceries in situ using in vitro training data. In: 2007 IEEE conference on computer vision and pattern recognition. IEEE, Minneapolis, pp 1\u20138. https:\/\/doi.org\/10.1109\/CVPR.2007.383486","DOI":"10.1109\/CVPR.2007.383486"},{"key":"11742_CR63","unstructured":"Michaelis C, Ustyuzhaninov I, Bethge M et\u00a0al (2019) One-shot instance segmentation. arXiv:1811.11507 [cs]"},{"issue":"6","key":"11742_CR64","first-page":"1","volume":"3","author":"E Million","year":"2007","unstructured":"Million E (2007) The hadamard product. Course Notes 3(6):1\u20137","journal-title":"Course Notes"},{"key":"11742_CR65","doi-asserted-by":"publisher","unstructured":"Minderer M, Gritsenko A, Stone A et\u00a0al (2022) Simple open-vocabulary object detection. In: Avidan S, Brostow G, Ciss\u00e9 M, et\u00a0al (eds) Computer vision\u2014ECCV 2022. Springer, Cham, pp 728\u2013755. https:\/\/doi.org\/10.1007\/978-3-031-20080-9_42","DOI":"10.1007\/978-3-031-20080-9_42"},{"key":"11742_CR66","doi-asserted-by":"publisher","unstructured":"Osokin A, Sumin D, Lomakin V (2020) OS2D: one-stage one-shot object detection by matching anchor features. In: Vedaldi A, Bischof H, Brox T, et\u00a0al (eds) Computer vision\u2014ECCV 2020. Lecture notes in computer science. Springer, Cham, pp 635\u2013652. https:\/\/doi.org\/10.1007\/978-3-030-58555-6_38","DOI":"10.1007\/978-3-030-58555-6_38"},{"key":"11742_CR67","doi-asserted-by":"publisher","unstructured":"Pang J, Qiu L, Li X et\u00a0al (2021) Quasi-dense similarity learning for multiple object tracking. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 164\u2013173. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00023","DOI":"10.1109\/CVPR46437.2021.00023"},{"key":"11742_CR68","unstructured":"Peng J, Xiao C, Li Y (2021) RP2K: a large-scale retail product dataset for fine-grained image classification. arXiv:2006.12634 [cs]"},{"issue":"8","key":"11742_CR69","doi-asserted-by":"publisher","first-page":"573","DOI":"10.1016\/j.imavis.2012.06.004","volume":"30","author":"P Piccinini","year":"2012","unstructured":"Piccinini P, Prati A, Cucchiara R (2012) Real-time object detection and localization with SIFT-based clustering. Image Vis Comput 30(8):573\u2013587. https:\/\/doi.org\/10.1016\/j.imavis.2012.06.004","journal-title":"Image Vis Comput"},{"key":"11742_CR70","doi-asserted-by":"publisher","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: better, faster, stronger. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), pp 6517\u20136525. https:\/\/doi.org\/10.1109\/CVPR.2017.690","DOI":"10.1109\/CVPR.2017.690"},{"key":"11742_CR71","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: an incremental improvement. arXiv:1804.02767 [cs]"},{"key":"11742_CR72","doi-asserted-by":"publisher","unstructured":"Redmon J, Divvala S, Girshick R et\u00a0al (2016) You only look once: unified, real-time object detection. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 779\u2013788. https:\/\/doi.org\/10.1109\/CVPR.2016.91","DOI":"10.1109\/CVPR.2016.91"},{"key":"11742_CR73","unstructured":"Ren S, He K, Girshick R et\u00a0al (2015) Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, vol\u00a028. Curran Associates, Inc"},{"key":"11742_CR74","doi-asserted-by":"crossref","unstructured":"Rocco I, Arandjelovic R, Sivic J (2017) Convolutional neural network architecture for geometric matching, pp 6148\u20136157","DOI":"10.1109\/CVPR.2017.12"},{"key":"11742_CR75","doi-asserted-by":"publisher","unstructured":"Rublee E, Rabaud V, Konolige K et\u00a0al (2011) ORB: an efficient alternative to SIFT or SURF. In: 2011 international conference on computer vision, pp 2564\u20132571. https:\/\/doi.org\/10.1109\/ICCV.2011.6126544","DOI":"10.1109\/ICCV.2011.6126544"},{"key":"11742_CR76","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108257","volume":"121","author":"B Santra","year":"2022","unstructured":"Santra B, Shaw AK, Mukherjee DP (2022) Part-based annotation-free fine-grained classification of images of retail products. Pattern Recognit 121:108257. https:\/\/doi.org\/10.1016\/j.patcog.2021.108257","journal-title":"Pattern Recognit"},{"key":"11742_CR77","doi-asserted-by":"publisher","unstructured":"Sedgh\u00a0Gooya E, Al\u00a0Falou A, Kaddah W (2020) Robust and discriminating face recognition system based on a neural network and correlation techniques. In: 2020 tenth international conference on image processing theory, tools and applications (IPTA), pp 1\u20135. https:\/\/doi.org\/10.1109\/IPTA50016.2020.9286617","DOI":"10.1109\/IPTA50016.2020.9286617"},{"key":"11742_CR78","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556 [cs]"},{"key":"11742_CR79","unstructured":"Snell J, Swersky K, Zemel R (2017) Prototypical networks for few-shot learning. In: Advances in neural information processing systems, vol\u00a030. Curran Associates, Inc"},{"key":"11742_CR80","unstructured":"Socher R, Ganjoo M, Manning CD et\u00a0al (2013) Zero-shot learning through cross-modal transfer. In: Advances in neural information processing systems, vol\u00a026. Curran Associates, Inc"},{"key":"11742_CR81","unstructured":"Song H, Sun D, Chun S et\u00a0al (2021) ViDT: an efficient and effective fully transformer-based object detector. arXiv:2110.03921 [cs]"},{"key":"11742_CR82","doi-asserted-by":"publisher","unstructured":"Sung F, Yang Y, Zhang L et\u00a0al (2018) Learning to compare: relation network for few-shot learning. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 1199\u20131208. https:\/\/doi.org\/10.1109\/CVPR.2018.00131","DOI":"10.1109\/CVPR.2018.00131"},{"key":"11742_CR83","doi-asserted-by":"publisher","unstructured":"Tian Z, Shen C, Chen H et\u00a0al (2019) FCOS: fully convolutional one-stage object detection. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 9626\u20139635. https:\/\/doi.org\/10.1109\/ICCV.2019.00972","DOI":"10.1109\/ICCV.2019.00972"},{"key":"11742_CR84","doi-asserted-by":"publisher","unstructured":"Tonioni A, Di\u00a0Stefano L (2017) Product recognition in store shelves as a sub-graph isomorphism problem. In: Battiato S, Gallo G, Schettini R, et\u00a0al (eds) Image analysis and processing\u2014ICIAP 2017. Lecture Notes in Computer Science. Springer, Cham, pp 682\u2013693. https:\/\/doi.org\/10.1007\/978-3-319-68560-1_61","DOI":"10.1007\/978-3-319-68560-1_61"},{"key":"11742_CR85","unstructured":"Touvron H, Cord M, Douze M et\u00a0al (2021) Training data-efficient image transformers & distillation through attention. arXiv:2012.12877 [cs]"},{"key":"11742_CR86","unstructured":"Vaswani A, Shazeer N, Parmar N et\u00a0al (2017) Attention is all you need. In: Advances in neural information processing systems, vol\u00a030. Curran Associates, Inc"},{"key":"11742_CR87","doi-asserted-by":"crossref","unstructured":"Wang CY, Bochkovskiy A, Liao HYM (2022) YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv:2207.02696 [cs]","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"11742_CR88","doi-asserted-by":"crossref","unstructured":"Wang CY, Yeh IH, Liao HYM (2024) YOLOv9: learning what you want to learn using programmable gradient information. arXiv:2402.13616 [cs]","DOI":"10.1007\/978-3-031-72751-1_1"},{"issue":"18","key":"11742_CR89","doi-asserted-by":"publisher","first-page":"14613","DOI":"10.1007\/s00521-020-05148-3","volume":"32","author":"W Wang","year":"2020","unstructured":"Wang W, Cui Y, Li G et al (2020) A self-attention-based destruction and construction learning fine-grained image classification method for retail product recognition. Neural Comput Appl 32(18):14613\u201314622. https:\/\/doi.org\/10.1007\/s00521-020-05148-3","journal-title":"Neural Comput Appl"},{"key":"11742_CR90","doi-asserted-by":"publisher","unstructured":"Wang X, Girshick R, Gupta A et\u00a0al (2018) Non-local neural networks. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition. IEEE, Salt Lake City, pp 7794\u20137803. https:\/\/doi.org\/10.1109\/CVPR.2018.00813","DOI":"10.1109\/CVPR.2018.00813"},{"key":"11742_CR91","unstructured":"Wang X, Huang TE, Darrell T et\u00a0al (2020) Frustratingly simple few-shot object detection. arXiv:2003.06957 [cs]"},{"key":"11742_CR92","unstructured":"Wang Y, Wu XM, Li Q et\u00a0al (2018) Large margin few-shot learning. arXiv:abs\/1807.02872"},{"key":"11742_CR93","doi-asserted-by":"crossref","unstructured":"Wang Z, Yang B, Yue H et\u00a0al (2024) Fine-grained prototypes distillation for few-shot object detection. arXiv:2401.07629 [cs]","DOI":"10.1609\/aaai.v38i6.28399"},{"key":"11742_CR94","unstructured":"Wei XS, Cui Q, Yang L et\u00a0al (2019) RPC: a large-scale retail product checkout dataset. arXiv:1901.07249 [cs]"},{"key":"11742_CR95","doi-asserted-by":"publisher","DOI":"10.1155\/2020\/8875910","volume":"2020","author":"Y Wei","year":"2020","unstructured":"Wei Y, Tran S, Xu S et al (2020) Deep learning for retail product recognition: challenges and techniques. Comput Intell Neurosci 2020:e8875910. https:\/\/doi.org\/10.1155\/2020\/8875910","journal-title":"Comput Intell Neurosci"},{"key":"11742_CR96","doi-asserted-by":"publisher","unstructured":"Wu X, Sahoo D, Hoi S (2020) Meta-RCNN: meta learning for few-shot object detection. In: Proceedings of the 28th ACM international conference on multimedia. Association for Computing Machinery, New York, MM \u201920, pp 1679\u20131687. https:\/\/doi.org\/10.1145\/3394171.3413832","DOI":"10.1145\/3394171.3413832"},{"key":"11742_CR97","doi-asserted-by":"crossref","unstructured":"Xian Y, Schiele B, Akata Z (2017) Zero-shot learning\u2014the good, the bad and the ugly, pp 4582\u20134591","DOI":"10.1109\/CVPR.2017.328"},{"key":"11742_CR98","unstructured":"Xiao H, Rasul K, Vollgraf R (2017) Fashion-MNIST: a novel image dataset for benchmarking machine learning algorithms. arXiv:1708.07747 [cs, stat]"},{"key":"11742_CR99","doi-asserted-by":"crossref","unstructured":"Xiao Y, Lepetit V, Marlet R (2022) Few-shot object detection and viewpoint estimation for objects in the wild. arXiv:2007.12107 [cs]","DOI":"10.1109\/TPAMI.2022.3174072"},{"key":"11742_CR100","unstructured":"Xu Y, Zhang M, Fu C et\u00a0al (2023) Multi-modal queried object detection in the wild. arXiv:2305.18980 [cs]"},{"key":"11742_CR101","doi-asserted-by":"publisher","unstructured":"Yan X, Chen Z, Xu A et\u00a0al (2019) Meta R-CNN: towards general solver for instance-level low-shot learning. In: 2019 IEEE\/CVF international conference on computer vision (ICCV), pp 9576\u20139585. https:\/\/doi.org\/10.1109\/ICCV.2019.00967","DOI":"10.1109\/ICCV.2019.00967"},{"key":"11742_CR102","doi-asserted-by":"publisher","first-page":"390","DOI":"10.1016\/j.neucom.2021.04.116","volume":"455","author":"H Yang","year":"2021","unstructured":"Yang H, Lin Y, Zhang H et al (2021) Towards improving classification power for one-shot object detection. Neurocomputing 455:390\u2013400. https:\/\/doi.org\/10.1016\/j.neucom.2021.04.116","journal-title":"Neurocomputing"},{"issue":"3","key":"11742_CR103","doi-asserted-by":"publisher","first-page":"242","DOI":"10.18178\/ijmlc.2021.11.3.1042","volume":"11","author":"JX Yang","year":"2021","unstructured":"Yang JX, Li LD et al (2021) Warehouse management models using artificial intelligence technology with application at receiving stage\u2014a review. Int J Mach Learn Comput 11(3):242\u2013249. https:\/\/doi.org\/10.18178\/ijmlc.2021.11.3.1042","journal-title":"Int J Mach Learn Comput"},{"key":"11742_CR104","doi-asserted-by":"publisher","unstructured":"Yang H, Cai S, Sheng H et\u00a0al (2022) Balanced and hierarchical relation learning for one-shot object detection. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 7581\u20137590. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00744","DOI":"10.1109\/CVPR52688.2022.00744"},{"key":"11742_CR105","doi-asserted-by":"crossref","unstructured":"Zhang G, Luo Z, Cui K et\u00a0al (2021) Meta-DETR: image-level few-shot object detection with inter-class correlation exploitation. arXiv:2103.11731 [cs]","DOI":"10.1109\/TPAMI.2022.3195735"},{"key":"11742_CR106","unstructured":"Zhang H, Li F, Liu S et\u00a0al (2022) DINO: DETR with improved denoising anchor boxes for end-to-end object detection. arXiv:2203.03605 [cs]"},{"key":"11742_CR107","doi-asserted-by":"publisher","unstructured":"Zhang L, Zhou S, Guan J et\u00a0al (2021) Accurate few-shot object detection with support-query mutual guidance and hybrid loss. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 14419\u201314427. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01419","DOI":"10.1109\/CVPR46437.2021.01419"},{"key":"11742_CR108","unstructured":"Zhang T, Zhang Y, Sun X et\u00a0al (2020) Comparison network for one-shot conditional object detection. arXiv:1904.02317 [cs]"},{"key":"11742_CR109","unstructured":"Zhang X, Wang Y, Boularias A (2024) Detect everything with few examples. arXiv:2309.12969 [cs]"},{"key":"11742_CR110","doi-asserted-by":"publisher","unstructured":"Zhao Y, Guo X, Lu Y (2022) Semantic-aligned fusion transformer for one-shot object detection. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 7591\u20137601. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00745","DOI":"10.1109\/CVPR52688.2022.00745"},{"key":"11742_CR111","unstructured":"Zhu X, Su W, Lu L et\u00a0al (2021) Deformable DETR: deformable transformers for end-to-end object detection. arXiv:2010.04159 [cs]"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-025-11742-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-025-11742-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-025-11742-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,23]],"date-time":"2025-04-23T16:58:25Z","timestamp":1745427505000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-025-11742-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,18]]},"references-count":111,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["11742"],"URL":"https:\/\/doi.org\/10.1007\/s11063-025-11742-0","relation":{},"ISSN":["1573-773X"],"issn-type":[{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,18]]},"assertion":[{"value":"24 February 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"30"}}