{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T06:41:41Z","timestamp":1775544101316,"version":"3.50.1"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031282379","type":"print"},{"value":"9783031282386","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-28238-6_12","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T17:03:18Z","timestamp":1678986198000},"page":"172-187","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Domain-Aligned Data Augmentation for\u00a0Low-Resource and\u00a0Imbalanced Text Classification"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6396-5374","authenticated-orcid":false,"given":"Nikolaos","family":"Stylianou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9564-7100","authenticated-orcid":false,"given":"Despoina","family":"Chatzakou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4148-9028","authenticated-orcid":false,"given":"Theodora","family":"Tsikrika","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2505-9178","authenticated-orcid":false,"given":"Stefanos","family":"Vrochidis","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6447-9020","authenticated-orcid":false,"given":"Ioannis","family":"Kompatsiaris","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,17]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Anaby-Tavor, A., et al.: Do not have enough data? Deep learning to the rescue! In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 7383\u20137390 (2020)","DOI":"10.1609\/aaai.v34i05.6233"},{"key":"12_CR2","unstructured":"Baldi, P., Sadowski, P.J.: Understanding dropout. In: Advances in Neural Information Processing Systems, vol. 26 (2013)"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Bayer, M., Kaufhold, M.A., Buchhold, B., Keller, M., Dallmeyer, J., Reuter, C.: Data augmentation in natural language processing: a novel text generation approach for long and short text classifiers. Int. J. Mach. Learn. Cybern., 1\u201316 (2022)","DOI":"10.1007\/s13042-022-01553-3"},{"issue":"7","key":"12_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3544558","volume":"55","author":"M Bayer","year":"2022","unstructured":"Bayer, M., Kaufhold, M.A., Reuter, C.: A survey on data augmentation for text classification. ACM Comput. Surv. 55(7), 1\u201339 (2022)","journal-title":"ACM Comput. Surv."},{"key":"12_CR5","unstructured":"Bommasani, R., et al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)"},{"key":"12_CR6","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"8","key":"12_CR7","doi-asserted-by":"publisher","first-page":"2868","DOI":"10.1109\/TNNLS.2019.2899061","volume":"31","author":"D Brzezinski","year":"2019","unstructured":"Brzezinski, D., Stefanowski, J., Susmaga, R., Szczech, I.: On the dynamics of classification measures for imbalanced and streaming data. IEEE Trans. Neural Netw. Learn. Syst. 31(8), 2868\u20132878 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"12_CR8","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: Smote: synthetic minority over-sampling technique. J. Artif. Intell. Res. 16, 321\u2013357 (2002)","journal-title":"J. Artif. Intell. Res."},{"key":"12_CR9","doi-asserted-by":"publisher","unstructured":"Chen, J., Shen, D., Chen, W., Yang, D.: HiddenCut: simple data augmentation for natural language understanding with better generalizability. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 4380\u20134390. Association for Computational Linguistics, August 2021. https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.338","DOI":"10.18653\/v1\/2021.acl-long.338"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Collins, E., Rozanov, N., Zhang, B.: Evolutionary data measures: understanding the difficulty of text classification tasks. In: Proceedings of the 22nd Conference on Computational Natural Language Learning, pp. 380\u2013391 (2018)","DOI":"10.18653\/v1\/K18-1037"},{"key":"12_CR11","unstructured":"Coucke, A., et al.: Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces. arXiv preprint arXiv:1805.10190 (2018)"},{"key":"12_CR12","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186 (2019)"},{"key":"12_CR13","doi-asserted-by":"publisher","unstructured":"Feng, S.Y., Li, A.W., Hoey, J.: Keep calm and switch on! Preserving sentiment and fluency in semantic text exchange. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 2701\u20132711. Association for Computational Linguistics, Hong Kong, November 2019. https:\/\/doi.org\/10.18653\/v1\/D19-1272","DOI":"10.18653\/v1\/D19-1272"},{"key":"12_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-98074-4","volume-title":"Learning from Imbalanced Data Sets","author":"A Fern\u00e1ndez","year":"2018","unstructured":"Fern\u00e1ndez, A., Garc\u00eda, S., Galar, M., Prati, R.C., Krawczyk, B., Herrera, F.: Learning from Imbalanced Data Sets. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-98074-4"},{"key":"12_CR15","unstructured":"Guo, H., Mao, Y., Zhang, R.: Augmenting data with Mixup for sentence classification: an empirical study. arXiv preprint arXiv:1905.08941 (2019)"},{"key":"12_CR16","unstructured":"Holtzman, A., Buys, J., Du, L., Forbes, M., Choi, Y.: The curious case of neural text degeneration. In: International Conference on Learning Representations (2020)"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Karimi, A., Rossi, L., Prati, A.: AEDA: An easier data augmentation technique for text classification. In: Findings of the Association for Computational Linguistics: EMNLP 2021, pp. 2748\u20132754. Association for Computational Linguistics, Punta Cana, November 2021","DOI":"10.18653\/v1\/2021.findings-emnlp.234"},{"key":"12_CR18","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"12_CR19","unstructured":"Kumar, V., Choudhary, A., Cho, E.: Data augmentation using pre-trained transformer models. In: Proceedings of the 2nd Workshop on Life-long Learning for Spoken Language Systems, pp. 18\u201326. Association for Computational Linguistics, Suzhou, December 2020"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880 (2020)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Li, X., Roth, D.: Learning question classifiers. In: COLING 2002: The 19th International Conference on Computational Linguistics (2002)","DOI":"10.3115\/1072228.1072378"},{"key":"12_CR22","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"12_CR23","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1016\/j.patcog.2019.02.023","volume":"91","author":"A Luque","year":"2019","unstructured":"Luque, A., Carrasco, A., Mart\u00edn, A., de Las Heras, A.: The impact of class imbalance in classification performance metrics based on the binary confusion matrix. Pattern Recogn. 91, 216\u2013231 (2019)","journal-title":"Pattern Recogn."},{"key":"12_CR24","unstructured":"Maas, A.L., Daly, R.E., Pham, P.T., Huang, D., Ng, A.Y., Potts, C.: Learning word vectors for sentiment analysis. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 142\u2013150. Association for Computational Linguistics, Portland, June 2011"},{"key":"12_CR25","unstructured":"Mani, I., Zhang, I.: KNN approach to unbalanced data distributions: a case study involving information extraction. In: Proceedings of Workshop on Learning From Imbalanced Datasets, vol. 126, pp. 1\u20137. ICML (2003)"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Pang, B., Lee, L.: A sentimental education: Sentiment analysis using subjectivity summarization based on minimum cuts. In: Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics (ACL 2004), pp. 271\u2013278 (2004)","DOI":"10.3115\/1218955.1218990"},{"key":"12_CR27","unstructured":"Qu, Y., Shen, D., Shen, Y., Sajeev, S., Chen, W., Han, J.: CoDA: contrast-enhanced and diversity-promoting data augmentation for natural language understanding. In: International Conference on Learning Representations (2020)"},{"issue":"8","key":"12_CR28","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"12_CR29","unstructured":"Rae, J.W., et al.: Scaling language models: methods, analysis & insights from training gopher. arXiv preprint arXiv:2112.11446 (2021)"},{"key":"12_CR30","unstructured":"Rosenfeld, J.S.: Scaling laws for deep learning. arXiv preprint arXiv:2108.07686 (2021)"},{"key":"12_CR31","unstructured":"Shen, D., Zheng, M., Shen, Y., Qu, Y., Chen, W.: A simple but tough-to-beat data augmentation approach for natural language understanding and generation. arXiv preprint arXiv:2009.13818 (2020)"},{"key":"12_CR32","unstructured":"Shleifer, S.: Low resource text classification with ULMFit and backtranslation. arXiv preprint arXiv:1903.09244 (2019)"},{"key":"12_CR33","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642 (2013)"},{"key":"12_CR34","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"12_CR35","doi-asserted-by":"publisher","unstructured":"Wei, J., Zou, K.: EDA: easy data augmentation techniques for boosting performance on text classification tasks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 6382\u20136388. Association for Computational Linguistics, Hong Kong, November 2019. https:\/\/doi.org\/10.18653\/v1\/D19-1670","DOI":"10.18653\/v1\/D19-1670"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Wu, T., Ribeiro, M.T., Heer, J., Weld, D.S.: Polyjuice: generating counterfactuals for explaining, evaluating, and improving models. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 6707\u20136723 (2021)","DOI":"10.18653\/v1\/2021.acl-long.523"},{"key":"12_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1007\/978-3-030-22747-0_7","volume-title":"Computational Science \u2013 ICCS 2019","author":"X Wu","year":"2019","unstructured":"Wu, X., Lv, S., Zang, L., Han, J., Hu, S.: Conditional BERT contextual augmentation. In: Rodrigues, J.M.F., et al. (eds.) ICCS 2019. LNCS, vol. 11539, pp. 84\u201395. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-22747-0_7"},{"key":"12_CR38","unstructured":"Xie, Z., et al.: Data noising as smoothing in neural network language models. In: 5th International Conference on Learning Representations, ICLR 2017 (2017)"},{"key":"12_CR39","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R.R., Le, Q.V.: XLNet: generalized autoregressive pretraining for language understanding. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019 Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 32. Curran Associates, Inc. (2019)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-28238-6_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,5]],"date-time":"2024-03-05T13:46:12Z","timestamp":1709646372000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-28238-6_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031282379","9783031282386"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-28238-6_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dublin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ireland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"45","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2023.org\/index.html?v=1.0","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"489","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"77","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"83","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}