{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T16:24:25Z","timestamp":1775924665709,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2022,8,20]],"date-time":"2022-08-20T00:00:00Z","timestamp":1660953600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,20]],"date-time":"2022-08-20T00:00:00Z","timestamp":1660953600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s11042-022-13424-8","type":"journal-article","created":{"date-parts":[[2022,8,20]],"date-time":"2022-08-20T15:02:59Z","timestamp":1661007779000},"page":"7383-7400","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Improving apparel detection with category grouping and multi-grained branches"],"prefix":"10.1007","volume":"82","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1808-2887","authenticated-orcid":false,"given":"Qing","family":"Tian","sequence":"first","affiliation":[]},{"given":"Sampath","family":"Chanda","sequence":"additional","affiliation":[]},{"given":"Amit Kumar K","family":"C","sequence":"additional","affiliation":[]},{"given":"Douglas","family":"Gray","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,20]]},"reference":[{"key":"13424_CR1","unstructured":"An implementation of YOLOv4 in PyTorch. https:\/\/github.com\/bubbliiiing\/yolov4-pytorch. Accessed 14 March 2022"},{"key":"13424_CR2","unstructured":"Bochkovskiy A, Wang C Y, Liao H Y M (2020) Yolov4: Optimal speed and accuracy of object detection. arXiv:2004.10934"},{"key":"13424_CR3","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"13424_CR4","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla N V, Bowyer K W, Hall L O, Kegelmeyer W P (2002) Smote: synthetic minority over-sampling technique. J Artif Intell Res 16:321\u2013357","journal-title":"J Artif Intell Res"},{"key":"13424_CR5","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L. -J, Li K, Fei-fei L (2009) ImageNet: a large-scale hierarchical image database. In: CVPR09","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"13424_CR6","doi-asserted-by":"crossref","unstructured":"Deng J, Krause J, Berg A C, Fei-Fei L (2012) Hedging your bets: optimizing accuracy-specificity trade-offs in large scale visual recognition. In: 2012 IEEE Conference on computer vision and pattern recognition, pp 3450\u20133457. IEEE","DOI":"10.1109\/CVPR.2012.6248086"},{"key":"13424_CR7","doi-asserted-by":"crossref","unstructured":"Ge Y, Zhang R, Wang X, Tang X, Luo P (2019) Deepfashion2: a versatile benchmark for detection, pose estimation, segmentation and re-identification of clothing images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5337\u20135345","DOI":"10.1109\/CVPR.2019.00548"},{"key":"13424_CR8","doi-asserted-by":"crossref","unstructured":"Gidaris S, Komodakis N (2015) Object detection via a multi-region and semantic segmentation-aware cnn model. In: Proceedings of the IEEE international conference on computer vision, pp 1134\u20131142","DOI":"10.1109\/ICCV.2015.135"},{"key":"13424_CR9","doi-asserted-by":"crossref","unstructured":"Gidaris S, Komodakis N (2016) Attend refine repeat: active box proposal generation via in-out localization. arXiv:1606.04446","DOI":"10.5244\/C.30.90"},{"key":"13424_CR10","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"13424_CR11","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"13424_CR12","doi-asserted-by":"crossref","unstructured":"Hara K, Jagadeesh V, Piramuthu R (2016) Fashion apparel detection: the role of deep convolutional neural network and pose-dependent priors. In: 2016 IEEE Winter conference on applications of computer vision (WACV). IEEE, pp 1\u20139","DOI":"10.1109\/WACV.2016.7477611"},{"issue":"9","key":"13424_CR13","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He H, Garcia E A (2009) Learning from imbalanced data. IEEE Trans Knowl Data Eng 21(9):1263\u20131284","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"13424_CR14","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"13424_CR15","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"13424_CR16","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger K Q (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"13424_CR17","doi-asserted-by":"publisher","first-page":"128837","DOI":"10.1109\/ACCESS.2019.2939201","volume":"7","author":"L Jiao","year":"2019","unstructured":"Jiao L, Zhang F, Liu F, Yang S, Li L, Feng Z, Qu R (2019) A survey of deep learning-based object detection. IEEE Access 7:128837\u2013128868","journal-title":"IEEE Access"},{"issue":"8","key":"13424_CR18","doi-asserted-by":"crossref","first-page":"3573","DOI":"10.1109\/TNNLS.2017.2732482","volume":"29","author":"SH Khan","year":"2017","unstructured":"Khan S H, Hayat M, Bennamoun M, Sohel F A, Togneri R (2017) Cost-sensitive learning of deep feature representations from imbalanced data. IEEE Trans Neural Netw Learn Syst 29(8):3573\u20133587","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"13424_CR19","doi-asserted-by":"publisher","first-page":"554","DOI":"10.1016\/j.asoc.2013.08.014","volume":"14","author":"B Krawczyk","year":"2014","unstructured":"Krawczyk B, Wo\u017aniak M, Schaefer G (2014) Cost-sensitive decision tree ensembles for effective imbalanced classification. Appl Soft Comput 14:554\u2013562","journal-title":"Appl Soft Comput"},{"key":"13424_CR20","unstructured":"Kuznetsova A, Rom H, Alldrin N, Uijlings J, Krasin I, Pont-Tuset J, Kamali S, Popov S, Malloci M, Duerig T et al (2018) The open images dataset v4: unified image classification, object detection, and visual relationship detection at scale. arXiv:1811.00982"},{"key":"13424_CR21","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick C L (2014) Microsoft coco: common objects in context. In: European conference on computer vision, pp 740\u2013755. Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"13424_CR22","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"13424_CR23","doi-asserted-by":"crossref","unstructured":"Lin T -Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"13424_CR24","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg A C (2016) Ssd: single shot multibox detector. In: European conference on computer vision. Springer, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"issue":"4","key":"13424_CR25","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1093\/ijl\/3.4.235","volume":"3","author":"GA Miller","year":"1990","unstructured":"Miller G A, Beckwith R, Fellbaum C, Gross D, Miller K J (1990) Introduction to wordnet: an on-line lexical database. Int J Lexicogr 3 (4):235\u2013244","journal-title":"Int J Lexicogr"},{"key":"13424_CR26","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"13424_CR27","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"13424_CR28","unstructured":"Redmon J, Farhadi A (2018) Yolov3: an incremental improvement. arXiv:1804.02767"},{"key":"13424_CR29","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, pp 91\u201399"},{"key":"13424_CR30","unstructured":"Sermanet P, Eigen D, Zhang X, Mathieu M, Fergus R, LeCun Y (2013) Overfeat: integrated recognition, localization and detection using convolutional networks. arXiv:1312.6229"},{"key":"13424_CR31","unstructured":"Tan M, Le Q (2019) Efficientnet: rethinking model scaling for convolutional neural networks. In: International conference on machine learning, pp 6105\u20136114. PMLR"},{"key":"13424_CR32","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le Q V (2020) Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"13424_CR33","doi-asserted-by":"crossref","unstructured":"Ting K M (2000) A comparative study of cost-sensitive boosting algorithms. In: Proceedings of the 17th international conference on machine learning. Citeseer","DOI":"10.1007\/3-540-45164-1_42"},{"issue":"2","key":"13424_CR34","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings J R, Van De Sande K E, Gevers T, Smeulders A W (2013) Selective search for object recognition. Int J Comput Vis 104(2):154\u2013171","journal-title":"Int J Comput Vis"},{"key":"13424_CR35","doi-asserted-by":"crossref","unstructured":"Wang S, Liu W, Wu J, Cao L, Meng Q, Kennedy P J (2016) Training deep neural networks on imbalanced data sets. In: 2016 International joint conference on neural networks (IJCNN). IEEE, pp 4368\u20134374","DOI":"10.1109\/IJCNN.2016.7727770"},{"key":"13424_CR36","unstructured":"Wang Y-X, Ramanan D, Hebert M (2017) Learning to model the tail. In: Advances in neural information processing systems. Accessed 14 March 2022, pp 7029\u20137039"},{"key":"13424_CR37","unstructured":"Wu Y, Kirillov A, Massa F, Lo W-Y, Girshick R (2019) Detectron2 https:\/\/github.com\/facebookresearch\/detectron2"},{"key":"13424_CR38","doi-asserted-by":"crossref","unstructured":"Wu J, Song L, Wang T, Zhang Q, Yuan J (2020) Forest r-cnn: large-vocabulary long-tailed object detection and instance segmentation. In: Proceedings of the 28th ACM international conference on multimedia, pp 1570\u20131578","DOI":"10.1145\/3394171.3413970"},{"key":"13424_CR39","unstructured":"Xu Z, Li B, Yuan Y, Dang A (2020) Beta r-cnn: looking into pedestrian detection from another perspective. Advances in Neural Information Processing Systems"},{"key":"13424_CR40","doi-asserted-by":"crossref","unstructured":"Yang B, Yan J, Lei Z, Li S Z (2016) Craft objects from images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6043\u20136051","DOI":"10.1109\/CVPR.2016.650"},{"key":"13424_CR41","doi-asserted-by":"crossref","unstructured":"Yang H, Wu H, Chen H (2019) Detecting 11k classes: large scale object detection without fine-grained bounding boxes. In: Proceedings of the IEEE international conference on computer vision, pp 9805\u20139813","DOI":"10.1109\/ICCV.2019.00990"},{"key":"13424_CR42","doi-asserted-by":"crossref","unstructured":"Yu F, Wang D, Shelhamer E, Darrell T (2018) Deep layer aggregation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2403\u20132412","DOI":"10.1109\/CVPR.2018.00255"},{"issue":"1","key":"13424_CR43","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1109\/TKDE.2006.17","volume":"18","author":"Z-H Zhou","year":"2005","unstructured":"Zhou Z-H, Liu X-Y (2005) Training cost-sensitive neural networks with methods addressing the class imbalance problem. IEEE Trans Knowl Data Eng 18(1):63\u201377","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"3","key":"13424_CR44","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1111\/j.1467-8640.2010.00358.x","volume":"26","author":"Z-H Zhou","year":"2010","unstructured":"Zhou Z-H, Liu X-Y (2010) On multi-class cost-sensitive learning. Comput Intell 26(3):232\u2013257","journal-title":"Comput Intell"},{"key":"13424_CR45","doi-asserted-by":"crossref","unstructured":"Zitnick C L, Doll\u00e1r P (2014) Edge boxes: locating object proposals from edges. In: European conference on computer vision. Springer, pp 391\u2013405","DOI":"10.1007\/978-3-319-10602-1_26"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13424-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-13424-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13424-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T04:38:24Z","timestamp":1727843904000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-13424-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,20]]},"references-count":45,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["13424"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-13424-8","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,20]]},"assertion":[{"value":"17 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 July 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Competing interests: the authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Competing interests"}}]}}