{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T21:26:44Z","timestamp":1769635604045,"version":"3.49.0"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T00:00:00Z","timestamp":1736121600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T00:00:00Z","timestamp":1736121600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s00530-024-01624-1","type":"journal-article","created":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T17:34:10Z","timestamp":1736184850000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Long-tailed video recognition via majority-guided diffusion model"],"prefix":"10.1007","volume":"31","author":[{"given":"Yufan","family":"Hu","sequence":"first","affiliation":[]},{"given":"Yi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lixin","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,6]]},"reference":[{"key":"1624_CR1","doi-asserted-by":"publisher","first-page":"3567","DOI":"10.1109\/TMM.2023.3312917","volume":"26","author":"T Chen","year":"2024","unstructured":"Chen, T., Guo, Y., Hao, S., Hong, R.: Semi-supervised domain adaptation for major depressive disorder detection. IEEE Trans. Multimedia 26, 3567\u20133579 (2024)","journal-title":"IEEE Trans. Multimedia"},{"key":"1624_CR2","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: Smote: synthetic minority over-sampling technique. J. Artif. Intell. Res. 16, 321\u2013357 (2002)","journal-title":"J. Artif. Intell. Res."},{"key":"1624_CR3","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jia, M., Lin, T.-Y., Song, Y., Belongie, S.: Class-balanced loss based on effective number of samples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9268\u20139277 (2019)","DOI":"10.1109\/CVPR.2019.00949"},{"key":"1624_CR4","doi-asserted-by":"crossref","unstructured":"Kim, J., Jeong, J., Shin, J.: M2m: Imbalanced classification via major-to-minor translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13896\u201313905 (2020)","DOI":"10.1109\/CVPR42600.2020.01391"},{"key":"1624_CR5","doi-asserted-by":"crossref","unstructured":"Park, S., Hong, Y., Heo, B., Yun, S., Choi, J.Y.: The majority can help the minority: Context-rich minority oversampling for long-tailed classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6887\u20136896 (2022)","DOI":"10.1109\/CVPR52688.2022.00676"},{"key":"1624_CR6","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wu, Z., Weng, Z., Fu, H., Chen, J., Jiang, Y.-G., Davis, L.S.: Videolt: Large-scale long-tailed video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7960\u20137969 (2021)","DOI":"10.1109\/ICCV48922.2021.00786"},{"key":"1624_CR7","doi-asserted-by":"crossref","unstructured":"Perrett, T., Sinha, S., Burghardt, T., Mirmehdi, M., Damen, D.: Use your head: Improving long-tail video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2415\u20132425 (2023)","DOI":"10.1109\/CVPR52729.2023.00239"},{"key":"1624_CR8","doi-asserted-by":"publisher","first-page":"555","DOI":"10.1109\/TMM.2023.3267887","volume":"26","author":"Y Hu","year":"2023","unstructured":"Hu, Y., Gao, J., Xu, C.: Learning multi-expert distribution calibration for long-tailed video classification. IEEE Trans. Multimedia 26, 555\u2013567 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"1624_CR9","doi-asserted-by":"crossref","unstructured":"Li, X., Xu, H.: Meid: mixture-of-experts with internal distillation for long-tailed video recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence vol. 37, pp. 1451\u20131459 (2023)","DOI":"10.1609\/aaai.v37i2.25230"},{"key":"1624_CR10","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hulse, J., Khoshgoftaar, T.M., Napolitano, A.: Experimental perspectives on learning from imbalanced data. In: Proceedings of the 24th International Conference on Machine Learning, pp. 935\u2013942 (2007)","DOI":"10.1145\/1273496.1273614"},{"key":"1624_CR11","doi-asserted-by":"crossref","unstructured":"Pan, H., Guo, Y., Yu, M., Chen, J.: Enhanced long-tailed recognition with contrastive cutmix augmentation. IEEE Trans. Image Process. (2024)","DOI":"10.1109\/TIP.2024.3425148"},{"issue":"47","key":"1624_CR12","first-page":"1","volume":"23","author":"J Ho","year":"2022","unstructured":"Ho, J., Saharia, C., Chan, W., Fleet, D.J., Norouzi, M., Salimans, T.: Cascaded diffusion models for high fidelity image generation. J. Mach. Learn. Res. 23(47), 1\u201333 (2022)","journal-title":"J. Mach. Learn. Res."},{"key":"1624_CR13","doi-asserted-by":"crossref","unstructured":"Blattmann, A., Rombach, R., Ling, H., Dockhorn, T., Kim, S.W., Fidler, S., Kreis, K.: Align your latents: High-resolution video synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22563\u201322575 (2023)","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"1624_CR14","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"1624_CR15","unstructured":"Drummond, C., Holte, R.C., et al.: C4. 5, class imbalance, and cost sensitivity: why under-sampling beats over-sampling. In: Workshop on Learning from Imbalanced Datasets II, vol. 11 (2003)"},{"key":"1624_CR16","doi-asserted-by":"crossref","unstructured":"Wallace, B.C., Small, K., Brodley, C.E., Trikalinos, T.A.: Class imbalance, redux. In: 2011 IEEE 11th International Conference on Data Mining, pp. 754\u2013763 (2011). Ieee","DOI":"10.1109\/ICDM.2011.33"},{"key":"1624_CR17","doi-asserted-by":"crossref","unstructured":"Park, S., Lim, J., Jeon, Y., Choi, J.Y.: Influence-balanced loss for imbalanced visual classification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 735\u2013744 (2021)","DOI":"10.1109\/ICCV48922.2021.00077"},{"key":"1624_CR18","unstructured":"Menon, A.K., Jayasumana, S., Rawat, A.S., Jain, H., Veit, A., Kumar, S.: Long-tail learning via logit adjustment. In: International Conference on Learning Representations (2021)"},{"key":"1624_CR19","first-page":"1513","volume":"33","author":"K Tang","year":"2020","unstructured":"Tang, K., Huang, J., Zhang, H.: Long-tailed classification by keeping the good and removing the bad momentum causal effect. Adv. Neural. Inf. Process. Syst. 33, 1513\u20131524 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1624_CR20","doi-asserted-by":"crossref","unstructured":"Cai, J., Wang, Y., Hwang, J.-N.: Ace: Ally complementary experts for solving long-tailed recognition in one-shot. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 112\u2013121 (2021)","DOI":"10.1109\/ICCV48922.2021.00018"},{"key":"1624_CR21","unstructured":"Wang, X., Lian, L., Miao, Z., Liu, Z., Yu, S.: Long-tailed recognition by routing diverse distribution-aware experts. In: International Conference on Learning Representations (2021)"},{"key":"1624_CR22","doi-asserted-by":"crossref","unstructured":"Samuel, D., Chechik, G.: Distributional robustness loss for long-tail learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9495\u20139504 (2021)","DOI":"10.1109\/ICCV48922.2021.00936"},{"key":"1624_CR23","doi-asserted-by":"crossref","unstructured":"Yin, X., Yu, X., Sohn, K., Liu, X., Chandraker, M.: Feature transfer learning for face recognition with under-represented data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5704\u20135713 (2019)","DOI":"10.1109\/CVPR.2019.00585"},{"key":"1624_CR24","doi-asserted-by":"crossref","unstructured":"Zang, Y., Huang, C., Loy, C.C.: Fasa: Feature augmentation and sampling adaptation for long-tailed instance segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3457\u20133466 (2021)","DOI":"10.1109\/ICCV48922.2021.00344"},{"key":"1624_CR25","doi-asserted-by":"crossref","unstructured":"Moon, W., Seong, H.S., Heo, J.-P.: Minority-oriented vicinity expansion with attentive aggregation for video long-tailed recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37, pp. 1931\u20131939 (2023)","DOI":"10.1609\/aaai.v37i2.25284"},{"key":"1624_CR26","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1624_CR27","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1624_CR28","first-page":"12438","volume":"33","author":"Y Song","year":"2020","unstructured":"Song, Y., Ermon, S.: Improved techniques for training score-based generative models. Adv. Neural. Inf. Process. Syst. 33, 12438\u201312448 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1624_CR29","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"1624_CR30","unstructured":"Nichol, A., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., McGrew, B., Sutskever, I., Chen, M.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"1624_CR31","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.061251(2), 3 (2022)"},{"key":"1624_CR32","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo Lopes, R., Karagol Ayan, B., Salimans, T., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1624_CR33","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J., Penna, J., Rombach, R.: Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"1624_CR34","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"1624_CR35","doi-asserted-by":"crossref","unstructured":"Chen, N., Yue, J., Fang, L., Xia, S.: Spectraldiff: A generative framework for hyperspectral image classification with diffusion models. IEEE Trans. Geosci. Remote Sens. (2023)","DOI":"10.1109\/TGRS.2023.3310023"},{"key":"1624_CR36","unstructured":"Mukhopadhyay, S., Gwilliam, M., Agarwal, V., Padmanabhan, N., Swaminathan, A., Hegde, S., Zhou, T., Shrivastava, A.: Diffusion models beat gans on image classification. arXiv preprint arXiv:2307.08702 (2023)"},{"key":"1624_CR37","unstructured":"Jiang, Y., Chen, H., Ko, H.: Spatial-temporal transformer-guided diffusion based data augmentation for efficient skeleton-based action recognition. arXiv preprint arXiv:2302.13434 (2023)"},{"key":"1624_CR38","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-assisted intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18, pp. 234\u2013241 (2015). Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"1624_CR39","unstructured":"Vaswani, A.: Attention is all you need. Adv. Neural Inf. Process. Syst. (2017)"},{"key":"1624_CR40","unstructured":"Yu, H., Tang, Y., Xie, E., Mao, J., Luo, P., Nie, Z.: Flow-based feature fusion for vehicle-infrastructure cooperative 3d object detection. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"1624_CR41","doi-asserted-by":"crossref","unstructured":"Li, H., Li, M., Peng, Q., Wang, S., Yu, H., Wang, Z.: Correlation-guided semantic consistency network for visible-infrared person re-identification. IEEE Trans. Circuits Syst. Video Technol. (2023)","DOI":"10.1109\/TCSVT.2023.3340225"},{"issue":"3","key":"1624_CR42","doi-asserted-by":"publisher","first-page":"854","DOI":"10.1007\/s11263-023-01879-7","volume":"132","author":"B Lai","year":"2024","unstructured":"Lai, B., Liu, M., Ryan, F., Rehg, J.M.: In the eye of transformer: global-local correlation for egocentric gaze estimation and beyond. Int. J. Comput. Vis. 132(3), 854\u2013871 (2024)","journal-title":"Int. J. Comput. Vis."},{"key":"1624_CR43","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wang, S., Li, H., Dou, Z., Li, J.: Graph-propagation based correlation learning for weakly supervised fine-grained image classification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 12289\u201312296 (2020)","DOI":"10.1609\/aaai.v34i07.6912"},{"key":"1624_CR44","doi-asserted-by":"crossref","unstructured":"Wu, Z., Weng, Z., Peng, W., Yang, X., Li, A., Davis, L.S., Jiang, Y.-G.: Building an open-vocabulary video clip model with better architectures, optimization and data. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3357503"},{"key":"1624_CR45","doi-asserted-by":"crossref","unstructured":"Wang, S., Chang, J., Wang, Z., Li, H., Ouyang, W., Tian, Q.: Content-aware rectified activation for zero-shot fine-grained image retrieval. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3355461"},{"key":"1624_CR46","doi-asserted-by":"crossref","unstructured":"Han, P., Ye, C., Zhou, J., Zhang, J., Hong, J., Li, X.: Latent-based diffusion model for long-tailed recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2639\u20132648 (2024)","DOI":"10.1109\/CVPRW63382.2024.00270"},{"key":"1624_CR47","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Varol, G., Wang, X., Farhadi, A., Laptev, I., Gupta, A.: Hollywood in homes: Crowdsourcing data collection for activity understanding. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 510\u2013526 (2016). Springer","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"1624_CR48","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Gupta, A., Schmid, C., Farhadi, A., Alahari, K.: Actor and observer: Joint modeling of first and third-person videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7396\u20137404 (2018)","DOI":"10.1109\/CVPR.2018.00772"},{"key":"1624_CR49","unstructured":"Cao, K., Wei, C., Gaidon, A., Arechiga, N., Ma, T.: Learning imbalanced datasets with label-distribution-aware margin loss. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1624_CR50","doi-asserted-by":"crossref","unstructured":"Tan, J., Wang, C., Li, B., Li, Q., Ouyang, W., Yin, C., Yan, J.: Equalization loss for long-tailed object recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11662\u201311671 (2020)","DOI":"10.1109\/CVPR42600.2020.01168"},{"key":"1624_CR51","unstructured":"Kang, B., Xie, S., Rohrbach, M., Yan, Z., Gordo, A., Feng, J., Kalantidis, Y.: Decoupling representation and classifier for long-tailed recognition. In: International Conference on Learning Representations (2019)"},{"key":"1624_CR52","unstructured":"Zhang, H., Cisse, M., Dauphin, Y.N., Lopez-Paz, D.: mixup: Beyond empirical risk minimization. In: International Conference on Learning Representations (2018)"},{"key":"1624_CR53","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1624_CR54","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: Tsm: Temporal shift module for efficient video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7083\u20137093 (2019)","DOI":"10.1109\/ICCV.2019.00718"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01624-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01624-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01624-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T11:00:05Z","timestamp":1740740405000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01624-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,6]]},"references-count":54,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1624"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01624-1","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,6]]},"assertion":[{"value":"17 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"48"}}