{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T12:30:37Z","timestamp":1764937837127,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,25]]},"DOI":"10.1145\/3704323.3704362","type":"proceedings-article","created":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T08:25:22Z","timestamp":1736238322000},"page":"427-433","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Large Language Model Data Augmentation for Text-Pair Classification Tasks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-2625-6538","authenticated-orcid":false,"given":"Yuyang","family":"Li","sequence":"first","affiliation":[{"name":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9150-1369","authenticated-orcid":false,"given":"Yuqing","family":"Zhang","sequence":"additional","affiliation":[{"name":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2136-114X","authenticated-orcid":false,"given":"Zelin","family":"Du","sequence":"additional","affiliation":[{"name":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0393-9884","authenticated-orcid":false,"given":"Ziqi","family":"Guo","sequence":"additional","affiliation":[{"name":"China University of Geosciences Beijing,School of Information Engineering, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,1,7]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_1_3_2","unstructured":"Ebtesam Almazrouei Hamza Alobeidli Abdulaziz Alshamsi Alessandro Cappelli Ruxandra Cojocaru M\u00e9rouane Debbah \u00c9tienne Goffinet Daniel Hesslow Julien Launay Quentin Malartic et\u00a0al. 2023. The falcon series of open language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.16867 (2023)."},{"key":"e_1_3_3_1_4_2","unstructured":"A Anaby-Tavor B Carmeli E Goldbraich A Kantor G Kour S Shlomov N Tepper and N Zwerdling. 1911. Not Enough Data? Deep Learning to the Rescue! arXiv 2019. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1911.03118 (1911)."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Samuel\u00a0R Bowman Gabor Angeli Christopher Potts and Christopher\u00a0D Manning. 2015. A large annotated corpus for learning natural language inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1508.05326 (2015).","DOI":"10.18653\/v1\/D15-1075"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Denny Britz Anna Goldie Minh-Thang Luong and Quoc Le. 2017. Massive exploration of neural machine translation architectures. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1703.03906 (2017).","DOI":"10.18653\/v1\/D17-1151"},{"key":"e_1_3_3_1_7_2","unstructured":"Daniel Cer Mona Diab Eneko Agirre Inigo Lopez-Gazpio and Lucia Specia. 2017. Semeval-2017 task 1: Semantic textual similarity-multilingual and cross-lingual focused evaluation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1708.00055 (2017)."},{"key":"e_1_3_3_1_8_2","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_9_2","volume-title":"Third international workshop on paraphrasing (IWP2005)","author":"Dolan Bill","year":"2005","unstructured":"Bill Dolan and Chris Brockett. 2005. Automatically constructing a corpus of sentential paraphrases. In Third international workshop on paraphrasing (IWP2005)."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1045"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Steven\u00a0Y Feng Varun Gangal Jason Wei Sarath Chandar Soroush Vosoughi Teruko Mitamura and Eduard Hovy. 2021. A survey of data augmentation approaches for NLP. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2105.03075 (2021).","DOI":"10.18653\/v1\/2021.findings-acl.84"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1555"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Hai Hu Kyle Richardson Liang Xu Lu Li Sandra Kuebler and Lawrence\u00a0S Moss. 2020. OCNLI: Original Chinese Natural Language Inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.05444 (2020).","DOI":"10.18653\/v1\/2020.findings-emnlp.314"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Melvin Johnson Mike Schuster Quoc\u00a0V Le Maxim Krikun Yonghui Wu Zhifeng Chen Nikhil Thorat Fernanda Vi\u00e9gas Martin Wattenberg Greg Corrado et\u00a0al. 2017. Google\u2019s multilingual neural machine translation system: Enabling zero-shot translation. Transactions of the Association for Computational Linguistics 5 (2017) 339\u2013351.","DOI":"10.1162\/tacl_a_00065"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Marcin Junczys-Dowmunt Roman Grundkiewicz Tomasz Dwojak Hieu Hoang Kenneth Heafield Tom Neckermann Frank Seide Ulrich Germann Alham\u00a0Fikri Aji Nikolay Bogoychev et\u00a0al. 2018. Marian: Fast neural machine translation in C++. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1804.00344 (2018).","DOI":"10.18653\/v1\/P18-4020"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.234"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-2072"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.26615\/978-954-452-049-6_049"},{"key":"e_1_3_3_1_19_2","unstructured":"Zhenzhong Lan Mingda Chen Sebastian Goodman Kevin Gimpel Piyush Sharma and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.11942 (2019)."},{"key":"e_1_3_3_1_20_2","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1907.11692 (2019)."},{"key":"e_1_3_3_1_21_2","unstructured":"Yuyang Nie Yuanhe Tian Xiang Wan Yan Song and Bo Dai. 2020. Named entity recognition for social media texts with semantic augmentation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.15458 (2020)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.600"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i09.7123"},{"key":"e_1_3_3_1_24_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Alex Wang Amanpreet Singh Julian Michael Felix Hill Omer Levy and Samuel\u00a0R Bowman. 2018. GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1804.07461 (2018).","DOI":"10.18653\/v1\/W18-5446"},{"key":"e_1_3_3_1_26_2","volume-title":"7th International Conference on Learning Representations, ICLR 2019","author":"Wang Alex","year":"2019","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel\u00a0R Bowman. 2019. Glue: A multi-task benchmark and analysis platform for natural language understanding. In 7th International Conference on Learning Representations, ICLR 2019."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1670"},{"key":"e_1_3_3_1_28_2","unstructured":"Adina Williams Nikita Nangia and Samuel\u00a0R Bowman. 2017. A broad-coverage challenge corpus for sentence understanding through inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1704.05426 (2017)."},{"key":"e_1_3_3_1_29_2","unstructured":"Qizhe Xie Zihang Dai Eduard Hovy Thang Luong and Quoc Le. 2020. Unsupervised data augmentation for consistency training. Advances in neural information processing systems 33 (2020) 6256\u20136268."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.419"},{"key":"e_1_3_3_1_31_2","unstructured":"An Yang Baosong Yang Binyuan Hui Bo Zheng Bowen Yu Chang Zhou Chengpeng Li Chengyuan Li Dayiheng Liu Fei Huang et\u00a0al. 2024. Qwen2 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.10671 (2024)."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Yiben Yang Chaitanya Malaviya Jared Fernandez Swabha Swayamdipta Ronan\u00a0Le Bras Ji-Ping Wang Chandra Bhagavatula Yejin Choi and Doug Downey. 2020. Generative data augmentation for commonsense reasoning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2004.11546 (2020).","DOI":"10.18653\/v1\/2020.findings-emnlp.90"},{"key":"e_1_3_3_1_33_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Yu Adams\u00a0Wei","year":"2018","unstructured":"Adams\u00a0Wei Yu, David Dohan, Minh-Thang Luong, Rui Zhao, Kai Chen, Mohammad Norouzi, and Quoc\u00a0V. Le. 2018. QANet: Combining Local Convolution with Global Self-Attention for Reading Comprehension. In International Conference on Learning Representations (ICLR)."}],"event":{"name":"ICCPR 2024: 2024 13th International Conference on Computing and Pattern Recognition","acronym":"ICCPR 2024","location":"Tianjin China"},"container-title":["Proceedings of the 2024 13th International Conference on Computing and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3704323.3704362","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3704323.3704362","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:05Z","timestamp":1750295885000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3704323.3704362"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,25]]},"references-count":32,"alternative-id":["10.1145\/3704323.3704362","10.1145\/3704323"],"URL":"https:\/\/doi.org\/10.1145\/3704323.3704362","relation":{},"subject":[],"published":{"date-parts":[[2024,10,25]]},"assertion":[{"value":"2025-01-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}