{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T22:56:19Z","timestamp":1773442579400,"version":"3.50.1"},"reference-count":64,"publisher":"Association for Computing Machinery (ACM)","issue":"5","license":[{"start":{"date-parts":[[2024,6,4]],"date-time":"2024-06-04T00:00:00Z","timestamp":1717459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Dalian Excellent Young Project","award":["2022RY35"],"award-info":[{"award-number":["2022RY35"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["No. 61902050, No. 62002039"],"award-info":[{"award-number":["No. 61902050, No. 62002039"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100005047","name":"Natural Science Foundation of Liaoning Province","doi-asserted-by":"crossref","award":["No. 2021-MS-136"],"award-info":[{"award-number":["No. 2021-MS-136"]}],"id":[{"id":"10.13039\/501100005047","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["ACM Trans. Softw. Eng. Methodol."],"published-print":{"date-parts":[[2024,6,30]]},"abstract":"<jats:p>\n            Online chatrooms serve as vital platforms for information exchange among software developers. With multiple developers engaged in rapid communication and diverse conversation topics, the resulting chat messages often manifest complexity and lack structure. To enhance the efficiency of extracting information from chat\n            <jats:italic>threads<\/jats:italic>\n            , automatic mining techniques are introduced for thread classification. However, previous approaches still grapple with unsatisfactory classification accuracy due to two primary challenges that they struggle to adequately capture long-distance dependencies within chat threads and address the issue of category imbalance in labeled datasets. To surmount these challenges, we present a topic classification approach for chat information types named EAEChat. Specifically, EAEChat comprises three core components: the text feature encoding component captures contextual text features using a multi-head self-attention mechanism-based text feature encoder, and a siamese network is employed to mitigate overfitting caused by limited data; the data augmentation component expands a small number of categories in the training dataset using a technique tailored to developer chat messages, effectively tackling the challenge of imbalanced category distribution; the non-text feature encoding component employs a feature fusion model to integrate deep text features with manually extracted non-text features. Evaluation across three real-world projects demonstrates that EAEChat, respectively, achieves an average precision, recall, and F1-score of 0.653, 0.651, and 0.644, and it marks a significant 7.60% improvement over the state-of-the-art approaches. These findings confirm the effectiveness of our method in proficiently classifying developer chat messages in online chatrooms.\n          <\/jats:p>","DOI":"10.1145\/3643677","type":"journal-article","created":{"date-parts":[[2024,1,29]],"date-time":"2024-01-29T12:57:22Z","timestamp":1706533042000},"page":"1-32","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Analyzing and Detecting Information Types of Developer Live Chat Threads"],"prefix":"10.1145","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6660-9947","authenticated-orcid":false,"given":"Xiuwei","family":"Shang","sequence":"first","affiliation":[{"name":"Dalian Maritime University, University of Science and Technology of China, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4216-3788","authenticated-orcid":false,"given":"Shuai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Dalian Maritime University, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2824-118X","authenticated-orcid":false,"given":"Yitong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Dalian Maritime University, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8554-6365","authenticated-orcid":false,"given":"Shikai","family":"Guo","sequence":"additional","affiliation":[{"name":"Dalian Maritime University, The Dalian Key Laboratory of Artificial Intelligence, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0003-9653","authenticated-orcid":false,"given":"Yulong","family":"Li","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5848-6398","authenticated-orcid":false,"given":"Rong","family":"Chen","sequence":"additional","affiliation":[{"name":"Dalian Maritime University, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1923-0669","authenticated-orcid":false,"given":"Hui","family":"Li","sequence":"additional","affiliation":[{"name":"Dalian Maritime University, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5068-1938","authenticated-orcid":false,"given":"Xiaochen","family":"Li","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8674-4948","authenticated-orcid":false,"given":"He","family":"Jiang","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,4]]},"reference":[{"key":"e_1_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/2818052.2869117"},{"key":"e_1_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/2593882.2593887"},{"key":"e_1_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3183440.3194951"},{"key":"e_1_3_1_5_2","unstructured":"GitLab. 2014. Gitter. Retrieved from https:\/\/gitter.im\/"},{"key":"e_1_3_1_6_2","unstructured":"Slack Technologies. 2013. Slack. Retrieved from https:\/\/slack.com\/"},{"key":"e_1_3_1_7_2","unstructured":"Freenode. 1995. Freenode. Retrieved from https:\/\/freenode.net\/"},{"key":"e_1_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2019.00075"},{"key":"e_1_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3412378"},{"key":"e_1_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Lin Shi Mingzhe Xing Mingyang Li Yawen Wang Shoubin Li and Qing Wang. 2020. Detection of hidden feature requests from massive chat messages via deep siamese network. In IEEE\/ACM 42nd International Conference on Software Engineering (ICSE\u201920). IEEE 641\u2013653.","DOI":"10.1145\/3377811.3380356"},{"key":"e_1_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110852"},{"key":"e_1_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/SANER.2018.8330223"},{"key":"e_1_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00058"},{"key":"e_1_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Eduard C. Groen Norbert Seyff Raian Ali Fabiano Dalpiaz Joerg Doerr Emitza Guzman Mahmood Hosseini Jordi Marco Marc Oriol Anna Perini and Stade Melanie. 2017. The crowd in requirements engineering: The landscape and challenges. IEEE Softw. 34 2 (2017) 44\u201352.","DOI":"10.1109\/MS.2017.33"},{"key":"e_1_3_1_15_2","article-title":"A large-scale corpus for conversation disentanglement","author":"Kummerfeld Jonathan K.","year":"2018","unstructured":"Jonathan K. Kummerfeld, Sai R. Gouravajhala, Joseph Peper, Vignesh Athreya, Chulaka Gunasekara, Jatin Ganhotra, Siva Sankalp Patel, Lazaros Polymenakos, and Walter S. Lasecki. 2018. A large-scale corpus for conversation disentanglement. arXiv preprint arXiv:1810.11118 (2018).","journal-title":"arXiv preprint arXiv:1810.11118"},{"key":"e_1_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"e_1_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASE51524.2021.9678923"},{"key":"e_1_3_1_18_2","article-title":"Structural characterization for dialogue disentanglement","author":"Ma Xinbei","year":"2022","unstructured":"Xinbei Ma, Zhuosheng Zhang, and Hai Zhao. 2022. Structural characterization for dialogue disentanglement. arXiv preprint arXiv:2110.08018 (2022).","journal-title":"arXiv preprint arXiv:2110.08018"},{"key":"e_1_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191577"},{"key":"e_1_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-023-05151-w"},{"key":"e_1_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1068"},{"key":"e_1_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/S19-2191"},{"key":"e_1_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3450503"},{"key":"e_1_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114751"},{"key":"e_1_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/2930238.2930285"},{"key":"e_1_3_1_27_2","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018).","journal-title":"arXiv preprint arXiv:1810.04805"},{"key":"e_1_3_1_28_2","unstructured":"Google. 2019. BERT-Small on HuggingFace. Retrieved from https:\/\/huggingface.co\/google\/bert_uncased_L-4_H-256_A-4"},{"key":"e_1_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Matthew E. Peters Mark Neumann Mohit Iyyer Matt Gardner Christopher Clark Kenton Lee and Luke Zettlemoyer. 2018. Deep contextualized word representations. CoRR abs\/1802.05365 (2018).","DOI":"10.18653\/v1\/N18-1202"},{"key":"e_1_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.202"},{"key":"e_1_3_1_31_2","first-page":"1","article-title":"A client-centric evaluation system to evaluate guest\u2019s satisfaction on AirBNB using machine learning and NLP","author":"Chiny Mohamed","year":"2021","unstructured":"Mohamed Chiny, Omar Bencharef, Moulay Youssef Hadi, and Younes Chihab. 2021. A client-centric evaluation system to evaluate guest\u2019s satisfaction on AirBNB using machine learning and NLP. Appl. Computat. Intell. Soft Comput. 2021 (2021), 1\u201314.","journal-title":"Appl. Computat. Intell. Soft Comput."},{"key":"e_1_3_1_32_2","first-page":"6256","article-title":"Unsupervised data augmentation for consistency training","volume":"33","author":"Xie Qizhe","year":"2020","unstructured":"Qizhe Xie, Zihang Dai, Eduard Hovy, Thang Luong, and Quoc Le. 2020. Unsupervised data augmentation for consistency training. Adv. Neural Inf. Process. Syst. 33 (2020), 6256\u20136268.","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"e_1_3_1_33_2","article-title":"Attention is all you need","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"e_1_3_1_34_2","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2018. Improving language understanding by generative pre-training. Retrieved from https:\/\/api.semanticscholar.org\/CorpusID:49313245"},{"key":"e_1_3_1_35_2","first-page":"1188","volume-title":"International Conference on Machine Learning","author":"Le Quoc","year":"2014","unstructured":"Quoc Le and Tomas Mikolov. 2014. Distributed representations of sentences and documents. In International Conference on Machine Learning. PMLR, 1188\u20131196."},{"key":"e_1_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.45"},{"key":"e_1_3_1_37_2","article-title":"Boosting source code learning with data augmentation: An empirical study","author":"Dong Zeming","year":"2023","unstructured":"Zeming Dong, Qiang Hu, Yuejun Guo, Zhenya Zhang, Maxime Cordy, Mike Papadakis, Yves Le Traon, and Jianjun Zhao. 2023. Boosting source code learning with data augmentation: An empirical study. arXiv preprint arXiv:2303.06808 (2023).","journal-title":"arXiv preprint arXiv:2303.06808"},{"key":"e_1_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2017.13"},{"key":"e_1_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3236024.3236031"},{"key":"e_1_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2013.2297712"},{"key":"e_1_3_1_41_2","unstructured":"Gitter. 2014. Angular Chatroom on Gitter. Retrieved from https:\/\/gitter.im\/angular\/angular"},{"key":"e_1_3_1_42_2","unstructured":"Gitter. 2014. Deeplearning4j chatroom on Gitter. Retrieved from https:\/\/gitter.im\/eclipse\/deeplearning4j"},{"key":"e_1_3_1_43_2","unstructured":"Gitter. 2014. Spring-boot chatroom on Gitter. Retrieved from https:\/\/gitter.im\/spring-projects\/spring-boot"},{"key":"e_1_3_1_44_2","unstructured":"Gitter. 2014. Gitter developer page. Retrieved from https:\/\/developer.gitter.im\/"},{"key":"e_1_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.12"},{"key":"e_1_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.2015.7332474"},{"key":"e_1_3_1_47_2","volume-title":"Card Sorting: Designing Usable Categories","author":"Spencer Donna","year":"2009","unstructured":"Donna Spencer. 2009. Card Sorting: Designing Usable Categories. Rosenfeld Media."},{"key":"e_1_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1177\/001316446002000104"},{"key":"e_1_3_1_49_2","article-title":"GBM: A highly efficient gradient boosting decision tree","volume":"30","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. GBM: A highly efficient gradient boosting decision tree. Adv. Neural Inf. Process. Syst. 30 (2017).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"e_1_3_1_50_2","article-title":"FastText. zip: Compressing text classification models","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Edouard Grave, Piotr Bojanowski, Matthijs Douze, H\u00e9rve J\u00e9gou, and Tomas Mikolov. 2016. FastText. zip: Compressing text classification models. arXiv preprint arXiv:1612.03651 (2016).","journal-title":"arXiv preprint arXiv:1612.03651"},{"key":"e_1_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2018.2876340"},{"key":"e_1_3_1_52_2","unstructured":"Allen Institute for Artificial Intelligence. 2023. AllenNLP. Retrieved from https:\/\/allennlp.org\/"},{"key":"e_1_3_1_53_2","unstructured":"Facebook. 2023. PyTorch. Retrieved from https:\/\/pytorch.org\/"},{"key":"e_1_3_1_54_2","unstructured":"Hugging Face. 2020. Transformers. Retrieved from https:\/\/huggingface.co\/"},{"key":"e_1_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2013.6606589"},{"key":"e_1_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2013.6693087"},{"key":"e_1_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2013.6606584"},{"key":"e_1_3_1_58_2","doi-asserted-by":"publisher","DOI":"10.1007\/s10515-010-0069-5"},{"key":"e_1_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.2009.5306333"},{"key":"e_1_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2017.43"},{"key":"e_1_3_1_61_2","article-title":"A comparison of synthetic oversampling methods for multi-class text classification","author":"Glazkova Anna","year":"2020","unstructured":"Anna Glazkova. 2020. A comparison of synthetic oversampling methods for multi-class text classification. arXiv preprint arXiv:2008.04636 (2020).","journal-title":"arXiv preprint arXiv:2008.04636"},{"key":"e_1_3_1_62_2","article-title":"EDA: Easy data augmentation techniques for boosting performance on text classification tasks","author":"Wei Jason","year":"2019","unstructured":"Jason Wei and Kai Zou. 2019. EDA: Easy data augmentation techniques for boosting performance on text classification tasks. arXiv preprint arXiv:1901.11196 (2019).","journal-title":"arXiv preprint arXiv:1901.11196"},{"key":"e_1_3_1_63_2","article-title":"Text data augmentation made simple by leveraging NLP cloud APIs","author":"Coulombe Claude","year":"2018","unstructured":"Claude Coulombe. 2018. Text data augmentation made simple by leveraging NLP cloud APIs. arXiv preprint arXiv:1812.04718 (2018).","journal-title":"arXiv preprint arXiv:1812.04718"},{"key":"e_1_3_1_64_2","doi-asserted-by":"publisher","DOI":"10.1613\/jair.953"},{"key":"e_1_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1002\/smr.2362"}],"container-title":["ACM Transactions on Software Engineering and Methodology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643677","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643677","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:33Z","timestamp":1750291533000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643677"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,4]]},"references-count":64,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,6,30]]}},"alternative-id":["10.1145\/3643677"],"URL":"https:\/\/doi.org\/10.1145\/3643677","relation":{},"ISSN":["1049-331X","1557-7392"],"issn-type":[{"value":"1049-331X","type":"print"},{"value":"1557-7392","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6,4]]},"assertion":[{"value":"2023-08-13","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-01-15","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-06-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}