{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T10:05:54Z","timestamp":1779357954278,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T00:00:00Z","timestamp":1725926400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,10]]},"DOI":"10.1145\/3648188.3675153","type":"proceedings-article","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T18:22:56Z","timestamp":1724955776000},"page":"337-343","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Leveraging GPT for the Generation of Multi-Platform Social Media Datasets for Research"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5076-8732","authenticated-orcid":false,"given":"Henry","family":"Tari","sequence":"first","affiliation":[{"name":"Maastricht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1480-4506","authenticated-orcid":false,"given":"M. Danial","family":"Khan","sequence":"additional","affiliation":[{"name":"Maastricht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8502-1957","authenticated-orcid":false,"given":"Justus","family":"Rutten","sequence":"additional","affiliation":[{"name":"Maastricht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8141-8073","authenticated-orcid":false,"given":"Darian","family":"Othman","sequence":"additional","affiliation":[{"name":"Maastricht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0897-4005","authenticated-orcid":false,"given":"Thales","family":"Bertaglia","sequence":"additional","affiliation":[{"name":"Utrecht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9200-7802","authenticated-orcid":false,"given":"Rishabh","family":"Kaushal","sequence":"additional","affiliation":[{"name":"Maastricht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2397-8963","authenticated-orcid":false,"given":"Adriana","family":"Iamnitchi","sequence":"additional","affiliation":[{"name":"Maastricht University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Midterm Elections. Proceedings of the International AAAI Conference on Web and Social Media 17 (June","author":"Aiyappa Rachith","year":"2023","unstructured":"Rachith Aiyappa, Matthew\u00a0R. DeVerna, Manita Pote, Bao\u00a0Tran Truong, Wanying Zhao, David Axelrod, Aria Pessianzadeh, Zoher Kachwala, Munjung Kim, Ozgur\u00a0Can Seckin, Minsuk Kim, Sunny Gandhi, Amrutha Manikonda, Francesco Pierri, Filippo Menczer, and Kai-Cheng Yang. 2023. A Multi-Platform Collection of Social Media Posts about the 2022 U.S. Midterm Elections. Proceedings of the International AAAI Conference on Web and Social Media 17 (June 2023), 981\u2013989."},{"key":"e_1_3_2_1_2_1","volume-title":"TweetEval: Unified Benchmark and Comparative Evaluation for Tweet Classification. In Findings of the Association for Computational Linguistics: EMNLP","author":"Barbieri Francesco","year":"2020","unstructured":"Francesco Barbieri, Jose Camacho-Collados, Luis\u00a0Espinosa Anke, and Leonardo Neves. 2020. TweetEval: Unified Benchmark and Comparative Evaluation for Tweet Classification. In Findings of the Association for Computational Linguistics: EMNLP 2020. ACL Anthology, online, 1644\u20131650."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v18i1.31303"},{"key":"e_1_3_2_1_4_1","volume-title":"OffLanDat: A Community Based Implicit Offensive Language Dataset Generated by Large Language Model Through Prompt Engineering. arXiv preprint arXiv:2403.02472 3, 02472","author":"Das Amit","year":"2024","unstructured":"Amit Das, Mostafa Rahgouy, Dongji Feng, Zheng Zhang, Tathagata Bhattacharya, Nilanjana Raychawdhary, Mary Sandage, Lauramarie Pope, Gerry Dozier, and Cheryl Seals. 2024. OffLanDat: A Community Based Implicit Offensive Language Dataset Generated by Large Language Model Through Prompt Engineering. arXiv preprint arXiv:2403.02472 3, 02472 (2024), 1\u201315."},{"key":"e_1_3_2_1_5_1","volume-title":"Down the Toxicity Rabbit Hole: A Novel Framework to Bias Audit Large Language Models. arXiv e-prints 1, 2309","author":"Dutta Arka","year":"2023","unstructured":"Arka Dutta, Adel Khorramrouz, Sujan Dutta, and Ashiqur\u00a0R KhudaBukhsh. 2023. Down the Toxicity Rabbit Hole: A Novel Framework to Bias Audit Large Language Models. arXiv e-prints 1, 2309 (2023), arXiv\u20132309."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3358206"},{"key":"e_1_3_2_1_7_1","volume-title":"BERTopic: Neural topic modeling with a class-based TF-IDF procedure. arXiv preprint arXiv:2203.05794 1, 1","author":"Grootendorst Maarten","year":"2022","unstructured":"Maarten Grootendorst. 2022. BERTopic: Neural topic modeling with a class-based TF-IDF procedure. arXiv preprint arXiv:2203.05794 1, 1 (2022), 1\u201310."},{"key":"e_1_3_2_1_8_1","volume-title":"Toxigen: A large-scale machine-generated dataset for adversarial and implicit hate speech detection. arXiv preprint arXiv:2203.09509 03, 09509","author":"Hartvigsen Thomas","year":"2022","unstructured":"Thomas Hartvigsen, Saadia Gabriel, Hamid Palangi, Maarten Sap, Dipankar Ray, and Ece Kamar. 2022. Toxigen: A large-scale machine-generated dataset for adversarial and implicit hate speech detection. arXiv preprint arXiv:2203.09509 03, 09509 (2022), 1\u201313."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocac114"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aaz8170"},{"key":"e_1_3_2_1_11_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. ArXiv abs\/1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. ArXiv abs\/1907.11692 (2019), 1\u201313."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-demo.25"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/10584609.2019.1661889"},{"key":"e_1_3_2_1_14_1","volume-title":"Is a prompt and a few samples all you need? Using GPT-4 for data augmentation in low-resource classification tasks. arXiv preprint arXiv:2304.13861 04, 13861","author":"M\u00f8ller Anders\u00a0Giovanni","year":"2023","unstructured":"Anders\u00a0Giovanni M\u00f8ller, Jacob\u00a0Aarup Dalsgaard, Arianna Pera, and Luca\u00a0Maria Aiello. 2023. Is a prompt and a few samples all you need? Using GPT-4 for data augmentation in low-resource classification tasks. arXiv preprint arXiv:2304.13861 04, 13861 (2023), 1\u201312."},{"key":"e_1_3_2_1_15_1","volume-title":"Text and code embeddings by contrastive pre-training. arXiv preprint arXiv:2201.10005 01, 10005","author":"Neelakantan Arvind","year":"2022","unstructured":"Arvind Neelakantan, Tao Xu, Raul Puri, Alec Radford, Jesse\u00a0Michael Han, Jerry Tworek, Qiming Yuan, Nikolas Tezak, Jong\u00a0Wook Kim, Chris Hallacy, 2022. Text and code embeddings by contrastive pre-training. arXiv preprint arXiv:2201.10005 01, 10005 (2022), 1\u201313."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615121"},{"key":"e_1_3_2_1_17_1","volume-title":"Claudia Wagner, and Arnim Bleier.","author":"Schoch David","year":"2023","unstructured":"David Schoch, Chung hong Chan, Claudia Wagner, and Arnim Bleier. 2023. Computational Reproducibility in Computational Social Science. arxiv:2307.01918\u00a0[cs.CY]"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3328529.3328562"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v8i1.14517"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-020-09488-3"},{"key":"e_1_3_2_1_21_1","volume-title":"Instagram Direct Messaging: A Neural Topic Modeling Approach. Social Media+ Society 10, 1","author":"Verbeij Tim","year":"2024","unstructured":"Tim Verbeij, Ine Beyens, Damian Trilling, and Patti\u00a0M Valkenburg. 2024. Happiness and Sadness in Adolescents\u2019 Instagram Direct Messaging: A Neural Topic Modeling Approach. Social Media+ Society 10, 1 (2024), 20563051241229655."},{"key":"e_1_3_2_1_22_1","volume-title":"Generating faithful synthetic data with large language models: A case study in computational social science. arXiv preprint arXiv:2305.15041 05, 15041","author":"Veselovsky Veniamin","year":"2023","unstructured":"Veniamin Veselovsky, Manoel\u00a0Horta Ribeiro, Akhil Arora, Martin Josifoski, Ashton Anderson, and Robert West. 2023. Generating faithful synthetic data with large language models: A case study in computational social science. arXiv preprint arXiv:2305.15041 05, 15041 (2023), 1\u20138."},{"key":"e_1_3_2_1_23_1","first-page":"5776","article-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers","volume":"33","author":"Wang Wenhui","year":"2020","unstructured":"Wenhui Wang, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. 2020. Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. Advances in Neural Information Processing Systems 33 (2020), 5776\u20135788.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"Comprehensive Assessment of Toxicity in ChatGPT. arXiv preprint arXiv:2311.14685 11, 14685","author":"Zhang Boyang","year":"2023","unstructured":"Boyang Zhang, Xinyue Shen, Wai\u00a0Man Si, Zeyang Sha, Zeyuan Chen, Ahmed Salem, Yun Shen, Michael Backes, and Yang Zhang. 2023. Comprehensive Assessment of Toxicity in ChatGPT. arXiv preprint arXiv:2311.14685 11, 14685 (2023), 1\u201311."}],"event":{"name":"HT '24: 35th ACM Conference on Hypertext and Social Media","location":"Poznan Poland","acronym":"HT '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 35th ACM Conference on Hypertext and Social Media"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3648188.3675153","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3648188.3675153","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:55:38Z","timestamp":1755878138000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3648188.3675153"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,10]]},"references-count":24,"alternative-id":["10.1145\/3648188.3675153","10.1145\/3648188"],"URL":"https:\/\/doi.org\/10.1145\/3648188.3675153","relation":{},"subject":[],"published":{"date-parts":[[2024,9,10]]},"assertion":[{"value":"2024-09-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}