{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T09:32:58Z","timestamp":1777887178415,"version":"3.51.4"},"reference-count":47,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010418","name":"Institute for Information and Communications Technology Promotion","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","award":["RS-2020-II201373"],"award-info":[{"award-number":["RS-2020-II201373"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Vision and Image Understanding"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1016\/j.cviu.2026.104684","type":"journal-article","created":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T16:19:53Z","timestamp":1771431593000},"page":"104684","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["ADuLTS: Appearance Descriptions under Long-Tailed Scenarios with diverse synthesized images"],"prefix":"10.1016","volume":"265","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1941-0691","authenticated-orcid":false,"given":"SeungJu","family":"Cha","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seunghee","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4525-871X","authenticated-orcid":false,"given":"Kwanyoung","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7231-7494","authenticated-orcid":false,"given":"Dong-Jin","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.cviu.2026.104684_b1","doi-asserted-by":"crossref","unstructured":"Brooks,\u00a0T., Holynski,\u00a0A., Efros,\u00a0A.A., 2023. Instructpix2pix: Learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 18392\u201318402.","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"10.1016\/j.cviu.2026.104684_b2","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b3","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1016\/j.neunet.2018.07.011","article-title":"A systematic study of the class imbalance problem in convolutional neural networks","volume":"106","author":"Buda","year":"2018","journal-title":"Neural Netw."},{"key":"10.1016\/j.cviu.2026.104684_b4","series-title":"International Conference on Machine Learning","first-page":"872","article-title":"What is the effect of importance weighting in deep learning?","author":"Byrd","year":"2019"},{"key":"10.1016\/j.cviu.2026.104684_b5","article-title":"Learning imbalanced datasets with label-distribution-aware margin loss","volume":"32","author":"Cao","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b6","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1613\/jair.953","article-title":"SMOTE: synthetic minority over-sampling technique","volume":"16","author":"Chawla","year":"2002","journal-title":"J. Artificial Intelligence Res."},{"key":"10.1016\/j.cviu.2026.104684_b7","series-title":"International Conference on Machine Learning","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"key":"10.1016\/j.cviu.2026.104684_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109270","article-title":"Hybrid routing transformer for zero-shot learning","volume":"137","author":"Cheng","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.cviu.2026.104684_b9","doi-asserted-by":"crossref","unstructured":"Cui,\u00a0Y., Jia,\u00a0M., Lin,\u00a0T.-Y., Song,\u00a0Y., Belongie,\u00a0S., 2019. Class-balanced loss based on effective number of samples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 9268\u20139277.","DOI":"10.1109\/CVPR.2019.00949"},{"key":"10.1016\/j.cviu.2026.104684_b10","doi-asserted-by":"crossref","unstructured":"Cui,\u00a0J., Zhong,\u00a0Z., Liu,\u00a0S., Yu,\u00a0B., Jia,\u00a0J., 2021. Parametric contrastive learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 715\u2013724.","DOI":"10.1109\/ICCV48922.2021.00075"},{"key":"10.1016\/j.cviu.2026.104684_b11","series-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"248","article-title":"Imagenet: A large-scale hierarchical image database","author":"Deng","year":"2009"},{"key":"10.1016\/j.cviu.2026.104684_b12","unstructured":"Drummond,\u00a0C., Holte,\u00a0R.C., et al., 2003. C4. 5, class imbalance, and cost sensitivity: why under-sampling beats over-sampling. In: Workshop on Learning from Imbalanced Datasets II. Vol. 11, pp. 1\u20138."},{"key":"10.1016\/j.cviu.2026.104684_b13","series-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","author":"Gal","year":"2022"},{"key":"10.1016\/j.cviu.2026.104684_b14","doi-asserted-by":"crossref","first-page":"60329","DOI":"10.52202\/075280-2636","article-title":"Enhancing minority classes by mixing: An adaptative optimal transport approach for long-tailed classification","volume":"36","author":"Gao","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b15","series-title":"How much data are augmentations worth? an investigation into scaling laws, invariance, and implicit regularization","author":"Geiping","year":"2022"},{"key":"10.1016\/j.cviu.2026.104684_b16","doi-asserted-by":"crossref","unstructured":"He,\u00a0K., Zhang,\u00a0X., Ren,\u00a0S., Sun,\u00a0J., 2016. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.cviu.2026.104684_b17","series-title":"Promptcap: Prompt-guided task-aware image captioning","author":"Hu","year":"2022"},{"key":"10.1016\/j.cviu.2026.104684_b18","series-title":"Decoupling representation and classifier for long-tailed recognition","author":"Kang","year":"2019"},{"key":"10.1016\/j.cviu.2026.104684_b19","first-page":"18661","article-title":"Supervised contrastive learning","volume":"33","author":"Khosla","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b20","doi-asserted-by":"crossref","unstructured":"Li,\u00a0H., Gu,\u00a0J., Koner,\u00a0R., Sharifzadeh,\u00a0S., Tresp,\u00a0V., 2023. Do DALL-E and Flamingo Understand Each Other?. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 1999\u20132010.","DOI":"10.1109\/ICCV51070.2023.00191"},{"key":"10.1016\/j.cviu.2026.104684_b21","doi-asserted-by":"crossref","unstructured":"Li,\u00a0S., Xia,\u00a0X., Ge,\u00a0S., Liu,\u00a0T., 2022. Selective-supervised contrastive learning with noisy labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 316\u2013325.","DOI":"10.1109\/CVPR52688.2022.00041"},{"key":"10.1016\/j.cviu.2026.104684_b22","doi-asserted-by":"crossref","unstructured":"Lin,\u00a0T.-Y., Goyal,\u00a0P., Girshick,\u00a0R., He,\u00a0K., Doll\u00e1r,\u00a0P., 2017. Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"10.1016\/j.cviu.2026.104684_b23","series-title":"Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.cviu.2026.104684_b24","article-title":"Capsule networks with residual pose routing","author":"Liu","year":"2024","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109971","article-title":"LCReg: Long-tailed image classification with Latent Categories based Recognition","volume":"145","author":"Liu","year":"2024","journal-title":"Pattern Recognit."},{"issue":"7","key":"10.1016\/j.cviu.2026.104684_b26","first-page":"3688","article-title":"Part-object relational visual saliency","volume":"44","author":"Liu","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.cviu.2026.104684_b27","doi-asserted-by":"crossref","unstructured":"Mullick,\u00a0S.S., Datta,\u00a0S., Das,\u00a0S., 2019. Generative adversarial minority oversampling. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 1695\u20131704.","DOI":"10.1109\/ICCV.2019.00178"},{"key":"10.1016\/j.cviu.2026.104684_b28","doi-asserted-by":"crossref","unstructured":"Park,\u00a0S., Hong,\u00a0Y., Heo,\u00a0B., Yun,\u00a0S., Choi,\u00a0J.Y., 2022. The majority can help the minority: Context-rich minority oversampling for long-tailed classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6887\u20136896.","DOI":"10.1109\/CVPR52688.2022.00676"},{"key":"10.1016\/j.cviu.2026.104684_b29","series-title":"2018 IEEE Conference on Multimedia Information Processing and Retrieval","first-page":"112","article-title":"Dynamic sampling in convolutional neural networks for imbalanced data classification","author":"Pouyanfar","year":"2018"},{"key":"10.1016\/j.cviu.2026.104684_b30","doi-asserted-by":"crossref","unstructured":"Pratt,\u00a0S., Covert,\u00a0I., Liu,\u00a0R., Farhadi,\u00a0A., 2023. What does a platypus look like? generating customized prompts for zero-shot image classification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 15691\u201315701.","DOI":"10.1109\/ICCV51070.2023.01438"},{"key":"10.1016\/j.cviu.2026.104684_b31","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.cviu.2026.104684_b32","unstructured":"Ravi,\u00a0S., Larochelle,\u00a0H., 2017. Optimization as a model for few-shot learning. In: International Conference on Learning Representations."},{"key":"10.1016\/j.cviu.2026.104684_b33","first-page":"4175","article-title":"Balanced meta-softmax for long-tailed visual recognition","volume":"33","author":"Ren","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b34","doi-asserted-by":"crossref","unstructured":"Rombach,\u00a0R., Blattmann,\u00a0A., Lorenz,\u00a0D., Esser,\u00a0P., Ommer,\u00a0B., 2022. High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"10.1016\/j.cviu.2026.104684_b35","doi-asserted-by":"crossref","unstructured":"Samuel,\u00a0D., Chechik,\u00a0G., 2021. Distributional robustness loss for long-tail learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 9495\u20139504.","DOI":"10.1109\/ICCV48922.2021.00936"},{"key":"10.1016\/j.cviu.2026.104684_b36","doi-asserted-by":"crossref","unstructured":"Sarafianos,\u00a0N., Xu,\u00a0X., Kakadiaris,\u00a0I.A., 2018. Deep imbalanced attribute classification using visual attention aggregation. In: Proceedings of the European Conference on Computer Vision. ECCV, pp. 680\u2013697.","DOI":"10.1007\/978-3-030-01252-6_42"},{"key":"10.1016\/j.cviu.2026.104684_b37","doi-asserted-by":"crossref","unstructured":"Sar\u0131y\u0131ld\u0131z,\u00a0M.B., Alahari,\u00a0K., Larlus,\u00a0D., Kalantidis,\u00a0Y., 2023. Fake it till you make it: Learning transferable representations from synthetic ImageNet clones. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 8011\u20138021.","DOI":"10.1109\/CVPR52729.2023.00774"},{"key":"10.1016\/j.cviu.2026.104684_b38","series-title":"Fill-up: Balancing long-tailed data with generative models","author":"Shin","year":"2023"},{"key":"10.1016\/j.cviu.2026.104684_b39","doi-asserted-by":"crossref","unstructured":"Van\u00a0Horn,\u00a0G., Mac\u00a0Aodha,\u00a0O., Song,\u00a0Y., Cui,\u00a0Y., Sun,\u00a0C., Shepard,\u00a0A., Adam,\u00a0H., Perona,\u00a0P., Belongie,\u00a0S., 2018. The inaturalist species classification and detection dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 8769\u20138778.","DOI":"10.1109\/CVPR.2018.00914"},{"key":"10.1016\/j.cviu.2026.104684_b40","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hulse,\u00a0J., Khoshgoftaar,\u00a0T.M., Napolitano,\u00a0A., 2007. Experimental perspectives on learning from imbalanced data. In: Proceedings of the 24th International Conference on Machine Learning. pp. 935\u2013942.","DOI":"10.1145\/1273496.1273614"},{"key":"10.1016\/j.cviu.2026.104684_b41","article-title":"Matching networks for one shot learning","volume":"29","author":"Vinyals","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cviu.2026.104684_b42","doi-asserted-by":"crossref","unstructured":"Wang,\u00a0P., Han,\u00a0K., Wei,\u00a0X.-S., Zhang,\u00a0L., Wang,\u00a0L., 2021. Contrastive learning based hybrid networks for long-tailed image classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 943\u2013952.","DOI":"10.1109\/CVPR46437.2021.00100"},{"key":"10.1016\/j.cviu.2026.104684_b43","series-title":"Real-fake: Effective training data synthesis through distribution matching","author":"Yuan","year":"2023"},{"key":"10.1016\/j.cviu.2026.104684_b44","doi-asserted-by":"crossref","unstructured":"Yun,\u00a0S., Han,\u00a0D., Oh,\u00a0S.J., Chun,\u00a0S., Choe,\u00a0J., Yoo,\u00a0Y., 2019. Cutmix: Regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 6023\u20136032.","DOI":"10.1109\/ICCV.2019.00612"},{"key":"10.1016\/j.cviu.2026.104684_b45","series-title":"Mixup: Beyond empirical risk minimization","author":"Zhang","year":"2017"},{"key":"10.1016\/j.cviu.2026.104684_b46","doi-asserted-by":"crossref","unstructured":"Zhou,\u00a0B., Cui,\u00a0Q., Wei,\u00a0X.-S., Chen,\u00a0Z.-M., 2020. Bbn: Bilateral-branch network with cumulative learning for long-tailed visual recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 9719\u20139728.","DOI":"10.1109\/CVPR42600.2020.00974"},{"key":"10.1016\/j.cviu.2026.104684_b47","doi-asserted-by":"crossref","unstructured":"Zhu,\u00a0J., Wang,\u00a0Z., Chen,\u00a0J., Chen,\u00a0Y.-P.P., Jiang,\u00a0Y.-G., 2022. Balanced contrastive learning for long-tailed visual recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6908\u20136917.","DOI":"10.1109\/CVPR52688.2022.00678"}],"container-title":["Computer Vision and Image Understanding"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1077314226000512?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1077314226000512?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T04:03:34Z","timestamp":1777608214000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1077314226000512"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":47,"alternative-id":["S1077314226000512"],"URL":"https:\/\/doi.org\/10.1016\/j.cviu.2026.104684","relation":{},"ISSN":["1077-3142"],"issn-type":[{"value":"1077-3142","type":"print"}],"subject":[],"published":{"date-parts":[[2026,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"ADuLTS: Appearance Descriptions under Long-Tailed Scenarios with diverse synthesized images","name":"articletitle","label":"Article Title"},{"value":"Computer Vision and Image Understanding","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.cviu.2026.104684","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104684"}}