{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:15:41Z","timestamp":1758845741818,"version":"3.44.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s41060-024-00674-y","type":"journal-article","created":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T14:06:44Z","timestamp":1730729204000},"page":"3495-3512","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Handling imbalanced textual data: an attention-based data augmentation approach"],"prefix":"10.1007","volume":"20","author":[{"given":"Amit Kumar","family":"Sah","sequence":"first","affiliation":[]},{"given":"Muhammad","family":"Abulaish","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"key":"674_CR1","doi-asserted-by":"publisher","unstructured":"Abulaish, M., Sah, AK.: A text data augmentation approach for improving the performance of CNN. In: 2019 11th International Conference on Communication Systems & Networks (COMSNETS). IEEE, Bangalore, India, pp. 625\u2013630 (2019). https:\/\/doi.org\/10.1109\/COMSNETS.2019.8711054","DOI":"10.1109\/COMSNETS.2019.8711054"},{"key":"674_CR2","doi-asserted-by":"publisher","unstructured":"Aburass, S.: Quantifying Overfitting: Introducing the Overfitting Index. arXiv preprint arXiv:2308.08682 (2023). https:\/\/doi.org\/10.48550\/arXiv.1409.0473","DOI":"10.48550\/arXiv.1409.0473"},{"key":"674_CR3","doi-asserted-by":"publisher","unstructured":"Bahdanau, D.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014). https:\/\/doi.org\/10.48550\/arXiv.1409.0473","DOI":"10.48550\/arXiv.1409.0473"},{"key":"674_CR4","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: SMOTE: synthetic minority over-sampling technique. J. Artif. Intell. Res. 16, 321\u2013357 (2002). https:\/\/doi.org\/10.1613\/jair.953","journal-title":"J. Artif. Intell. Res."},{"key":"674_CR5","doi-asserted-by":"publisher","unstructured":"Chen, C., Shu, K.: PromptDA: label-guided data augmentation for prompt-based few shot learners. In: Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics (EACL), Dubrovnik, Croatia; pp. 562\u2013574 (2023). https:\/\/doi.org\/10.18653\/v1\/2023.eacl-main.41","DOI":"10.18653\/v1\/2023.eacl-main.41"},{"issue":"9","key":"674_CR6","doi-asserted-by":"publisher","first-page":"6390","DOI":"10.1109\/TNNLS.2021.3136503","volume":"34","author":"D Dablain","year":"2022","unstructured":"Dablain, D., Krawczyk, B., Chawla, N.V.: DeepSMOTE: fusing deep learning and SMOTE for imbalanced data. IEEE Trans. Neural Netw. Learn. Syst. 34(9), 6390\u20136404 (2022). https:\/\/doi.org\/10.1109\/TNNLS.2021.3136503","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"674_CR7","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M-W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT), Minneapolis, MN, USA, pp. 4171\u20134186 (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"674_CR8","doi-asserted-by":"publisher","first-page":"4211","DOI":"10.1109\/TIFS.2021.3102498","volume":"16","author":"M Fazil","year":"2021","unstructured":"Fazil, M., Sah, A.K., Abulaish, M.: DeepSBD: a deep neural network model with attention mechanism for SocialBot detection. IEEE Trans. Inf. Forensics Secur. 16, 4211\u20134223 (2021). https:\/\/doi.org\/10.1109\/TIFS.2021.3102498","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"issue":"1","key":"674_CR9","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.patrec.2008.08.010","volume":"30","author":"C Ferri","year":"2009","unstructured":"Ferri, C., Hern\u00e1ndez-Orallo, J., Modroiu, R.: An experimental comparison of performance measures for classification. Pattern Recognit. Lett. 30(1), 27\u201338 (2009). https:\/\/doi.org\/10.1016\/j.patrec.2008.08.010","journal-title":"Pattern Recognit. Lett."},{"key":"674_CR10","doi-asserted-by":"publisher","unstructured":"Gupta, R.: Data augmentation for low resource sentiment analysis using generative adversarial networks. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, Brighton, United Kingdom, pp. 7380\u20137384 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8682544","DOI":"10.1109\/ICASSP.2019.8682544"},{"issue":"9","key":"674_CR11","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., Garcia, E.A.: Learning from imbalanced data. IEEE Trans. Knowl. Data Eng. 21(9), 1263\u20131284 (2009). https:\/\/doi.org\/10.1109\/TKDE.2008.239","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"674_CR12","doi-asserted-by":"publisher","unstructured":"He, H., Bai, Y., Garcia, EA., Li, S.: ADASYN: adaptive synthetic sampling approach for imbalanced learning. In: 2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence), Hong Kong, China, pp. 1322\u20131328 (2008). https:\/\/doi.org\/10.1109\/IJCNN.2008.4633969","DOI":"10.1109\/IJCNN.2008.4633969"},{"key":"674_CR13","doi-asserted-by":"publisher","unstructured":"He, R., McAuley, J.: Ups and downs: modeling the visual evolution of fashion trends with one-class collaborative filtering. In: Proceedings of the 25th International Conference on World Wide Web (WWW), Montr\u00e9al Qu\u00e9bec, Canada, pp. 507\u2013517 (2016). https:\/\/doi.org\/10.1145\/2872427.2883037","DOI":"10.1145\/2872427.2883037"},{"issue":"9","key":"674_CR14","doi-asserted-by":"publisher","first-page":"4332","DOI":"10.1109\/TNNLS.2021.3056664","volume":"33","author":"F Huang","year":"2021","unstructured":"Huang, F., Li, X., Yuan, C., Zhang, S., Zhang, J., Qiao, S.: Attention-emotion-enhanced convolutional LSTM for sentiment analysis. IEEE Trans. Neural Netw. Learn. Syst. 33(9), 4332\u201345 (2021). https:\/\/doi.org\/10.1109\/TNNLS.2021.3056664","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"674_CR15","doi-asserted-by":"publisher","unstructured":"Kobayashi, S.: Contextual augmentation: data augmentation by words with paradigmatic relations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT), New Orleans, Louisiana, USA, pp. 452\u2013457 (2018). https:\/\/doi.org\/10.18653\/v1\/N18-2072","DOI":"10.18653\/v1\/N18-2072"},{"key":"674_CR16","doi-asserted-by":"publisher","unstructured":"Kong, F., Zhang, R., Guo, X., Mensah, S., Mao, Y.: DropMix: a textual data augmentation combining dropout with mixup. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP), Abu Dhabi, UAE, pp. 890\u2013899 (2022). https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.57","DOI":"10.18653\/v1\/2022.emnlp-main.57"},{"key":"674_CR17","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, Nevada, USA (2012). https:\/\/papers.nips.cc\/paper_files\/paper\/2012\/hash\/c399862d3b9d6b76c8436e924a68c45b-Abstract.html"},{"issue":"11","key":"674_CR18","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998). https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc. IEEE"},{"issue":"7553","key":"674_CR19","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u201344 (2015). https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"key":"674_CR20","doi-asserted-by":"publisher","unstructured":"Liu, R., Xu, G., Jia, C., Ma, W., Wang, L., Vosoughi, S.: Data Boost: text data augmentation through reinforcement learning guided conditional generation. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 9031\u20139041 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.726","DOI":"10.18653\/v1\/2020.emnlp-main.726"},{"key":"674_CR21","doi-asserted-by":"publisher","unstructured":"McCoy, T., Pavlick, E., Linzen, T.: Right for the wrong reasons: diagnosing syntactic heuristics in natural language inference. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics (ACL), Florence, Italy, pp 3428\u20133448 (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1334","DOI":"10.18653\/v1\/P19-1334"},{"key":"674_CR22","doi-asserted-by":"publisher","unstructured":"Min, J., McCoy, R.T., Das, D., Pitler, E., Linzen, T.: Syntactic data augmentation increases robustness to inference heuristics. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (ACL), pp. 2339\u20132352 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.212","DOI":"10.18653\/v1\/2020.acl-main.212"},{"key":"674_CR23","doi-asserted-by":"publisher","unstructured":"Perez, L.: The effectiveness of data augmentation in image classification using deep learning. arXiv preprint arXiv:1712.04621 (2017). https:\/\/doi.org\/10.48550\/arXiv.1712.04621","DOI":"10.48550\/arXiv.1712.04621"},{"key":"674_CR24","doi-asserted-by":"publisher","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using siamese BERT-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, Hong Kong, China, pp. 3982\u20133992 (2019). https:\/\/doi.org\/10.18653\/v1\/D19-1410","DOI":"10.18653\/v1\/D19-1410"},{"key":"674_CR25","unstructured":"Sah, A., Abulaish, M.: DeepADA: an attention-based deep learning framework for augmenting imbalanced textual datasets. In: Proceedings of the 19th International Conference on Natural Language Processing (ICON), New Delhi, India, pp. 318\u2013327 (2022). https:\/\/aclanthology.org\/2022.icon-main.38\/"},{"key":"674_CR26","doi-asserted-by":"publisher","unstructured":"Sah, A.K., Abulaish, M.: ADA: an attention-based data augmentation approach to handle imbalanced textual datasets. In: International Conference on Neural Information Processing (ICONIP), pp. 477\u2013488(2022). https:\/\/doi.org\/10.1007\/978-981-99-1639-9_40","DOI":"10.1007\/978-981-99-1639-9_40"},{"key":"674_CR27","doi-asserted-by":"publisher","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Improving neural machine translation models with monolingual data. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (ACL), Berlin, Germany, pp. 86\u201396 (2016). https:\/\/doi.org\/10.18653\/v1\/P16-1009","DOI":"10.18653\/v1\/P16-1009"},{"key":"674_CR28","doi-asserted-by":"publisher","unstructured":"Stylianou, N., Chatzakou, D., Tsikrika, T., Vrochidis, S., Kompatsiaris, I.: Domain-aligned data augmentation for low-resource and imbalanced text classification. In: Proceedings of the 45th European Conference on Information Retrieval (ECIR), Dublin, Ireland, pp. 172\u2013187 (2023). https:\/\/doi.org\/10.1007\/978-3-031-28238-6_12","DOI":"10.1007\/978-3-031-28238-6_12"},{"key":"674_CR29","doi-asserted-by":"publisher","unstructured":"Sun, X., Lu, W.: Understanding attention for text classification. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (ACL), pp. 3418\u20133428 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.312","DOI":"10.18653\/v1\/2020.acl-main.312"},{"key":"674_CR30","doi-asserted-by":"publisher","unstructured":"Tang, D., Qin, B., Liu, T.: Document modeling with gated recurrent neural network for sentiment classification. In: Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP), Lisbon, Portugal, pp. 1422\u20131432 (2015). https:\/\/doi.org\/10.18653\/v1\/D15-1167","DOI":"10.18653\/v1\/D15-1167"},{"key":"674_CR31","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Proceedings of the 31st Conference on Neural Information Processing Systems, CA, USA; vol 30 https:\/\/papers.nips.cc\/paper_files\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html (2017)"},{"key":"674_CR32","doi-asserted-by":"publisher","unstructured":"Wei, J., Zou, K.: EDA: easy data augmentation techniques for boosting performance on text classification tasks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). November, Hong Kong, China, pp. 6382\u20136388 (2019). https:\/\/doi.org\/10.18653\/v1\/D19-1670","DOI":"10.18653\/v1\/D19-1670"},{"key":"674_CR33","doi-asserted-by":"publisher","unstructured":"Wu, X., Lv, S., Zang, L., Han, J., Hu, S.: Conditional BERT contextual augmentation. In: Proceedings of the 19th International Conference on Computational Science, Algarve, Portugal, pp. 84\u201395 (2019). https:\/\/doi.org\/10.1007\/978-3-030-22747-0_7","DOI":"10.1007\/978-3-030-22747-0_7"},{"key":"674_CR34","doi-asserted-by":"publisher","unstructured":"Yang, Z., Yang, D., Dyer, C., He, X., Smola, A., Hovy, E.: Hierarchical attention networks for document classification. In: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Association for Computational Linguistics (NAACL-HLT), San Diego, California, USA, pp. 1480\u20131489 (2016). https:\/\/doi.org\/10.18653\/v1\/N16-1174","DOI":"10.18653\/v1\/N16-1174"},{"issue":"7","key":"674_CR35","doi-asserted-by":"publisher","first-page":"2805","DOI":"10.1007\/s10115-023-01853-2","volume":"65","author":"Z Yang","year":"2023","unstructured":"Yang, Z., Sinnott, R.O., Bailey, J., Ke, Q.: A survey of automated data augmentation algorithms for deep learning-based image classification tasks. Knowl. Inf. Syst. 65(7), 2805\u201361 (2023). https:\/\/doi.org\/10.1007\/s10115-023-01853-2","journal-title":"Knowl. Inf. Syst."},{"key":"674_CR36","unstructured":"Zhang, X., Zhao, J., LeCun, Y.: Character-level convolutional networks for text classification. In: Advances in Neural Information Processing Systems, Quebec, Canada, pp. 649\u2013657 https:\/\/papers.nips.cc\/paper_files\/paper\/2015\/hash\/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html (2015)"},{"key":"674_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.115922","volume":"187","author":"X Zhang","year":"2022","unstructured":"Zhang, X., Xu, J., Soh, C., Chen, L.: LA-HCN: label-based attention for hierarchical multi-label text classification neural network. Expert Syst. Appl. 187, 115922 (2022). https:\/\/doi.org\/10.1016\/j.eswa.2021.115922","journal-title":"Expert Syst. Appl."},{"key":"674_CR38","doi-asserted-by":"publisher","unstructured":"Zhou, C., Sun, C., Liu, Z., Lau, F.: A C-LSTM neural network for text classification. arXiv preprint arXiv:1511.08630 (2015). https:\/\/doi.org\/10.48550\/arXiv.1511.08630","DOI":"10.48550\/arXiv.1511.08630"}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-024-00674-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41060-024-00674-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-024-00674-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T10:52:51Z","timestamp":1758797571000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41060-024-00674-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":38,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["674"],"URL":"https:\/\/doi.org\/10.1007\/s41060-024-00674-y","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"type":"print","value":"2364-415X"},{"type":"electronic","value":"2364-4168"}],"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"4 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}