{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T06:39:05Z","timestamp":1763102345829,"version":"3.45.0"},"reference-count":60,"publisher":"Tech Science Press","issue":"1","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":88,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.057792","type":"journal-article","created":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T03:27:22Z","timestamp":1740626842000},"page":"435-453","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":0,"title":["Amalgamation of Classical and Large Language Models for Duplicate Bug Detection: A Comparative Study"],"prefix":"10.32604","volume":"83","author":[{"given":"Sai Venkata Akhil","family":"Ammu","sequence":"first","affiliation":[]},{"given":"Sukhjit Singh","family":"Sehra","sequence":"additional","affiliation":[]},{"given":"Sumeet Kaur","family":"Sehra","sequence":"additional","affiliation":[]},{"given":"Jaiteg","family":"Singh","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","first-page":"470","volume":"12109","author":"Sehra","year":"2020","journal-title":"Advances in artificial intelligence. Canadian AI. Lecture notes in computer science"},{"key":"ref2","series-title":"2023 IEEE\/ACM International Conference on Automation of Software Test","first-page":"1","article-title":"An intelligent duplicate bug report detection method based on technical term extraction","author":"Wu","year":"2023 May"},{"key":"ref3","series-title":"Proceedings of the 32nd ACM\/IEEEInternational Conference on Software Engineering","first-page":"45","article-title":"A discriminative model approach for accurate duplicate bug report retrieval","author":"Sun","year":"2010 May"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3603109","article-title":"Duplicate bug report detection: how far are we?","volume":"32","author":"Zhang","year":"2023 Jul","journal-title":"ACM Trans Softw Eng Methodol"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"1960","DOI":"10.1007\/s10664-015-9404-6","article-title":"Studying the needed effort for identifying duplicate issues","volume":"21","author":"Rakha","year":"2016 Oct","journal-title":"Empir Softw Eng"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"111607","DOI":"10.1016\/j.jss.2023.111607","article-title":"Does deep learning improve the performance of duplicate bug report detection? an empirical study?","volume":"198","author":"Jiang","year":"2023 Apr","journal-title":"J Syst Softw"},{"key":"ref7","series-title":"Proceedings of IEEE\/ACM International Conference on Automated Software Engineering","first-page":"253","article-title":"Towards more accurate retrieval of duplicate bug reports","author":"Sun","year":"2011 Nov"},{"key":"ref8","series-title":"Proceedings of the 11th Working Conference on Mining Software Repositories","first-page":"324","article-title":"New features for duplicate bug detection","author":"Klein","year":"2014"},{"key":"ref9","series-title":"Proceedings of the 27th IEEE\/ACM International Conference on Automated Software Engineering","first-page":"70","article-title":"Duplicate bug report detection with a combination of information retrieval and topic modeling","author":"Nguyen","year":"2012 Sep"},{"key":"ref10","series-title":"Proceedings of the 30th International Conference on Software Engineering","first-page":"461","article-title":"An approach to detecting duplicate bug reports using natural language and execution information","author":"Wang","year":"2008 May"},{"key":"ref11","series-title":"Proceedings of the 30th International Conference on Software Engineering","first-page":"52","article-title":"Automated duplicate detection for bug tracking systems","author":"Jalbert","year":"2008 Jun"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"1762","DOI":"10.1587\/transinf.2016EDP7052","article-title":"Automated duplicate bug report detection using multi-factor analysis","volume":"E99.D","author":"Zou","year":"2016 Jul","journal-title":"IEICE Trans Inf Syst"},{"key":"ref13","series-title":"2016 IEEE 27th International Symposium on Software Reliability Engineering","first-page":"127","article-title":"Combining word embedding with information retrieval to recommend similar bug reports","author":"Yang","year":"2016 Oct"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"869","DOI":"10.1142\/S0218194017500322","article-title":"PRST: a pagerank-based summarization technique for summarizing bug reports with duplicates","volume":"27","author":"Jiang","year":"2017","journal-title":"Int J Softw Eng Knowl Eng"},{"key":"ref15","series-title":"2012 16th European Conference on Software Maintenance and Reengineering (CSMR\u201903)","first-page":"385","article-title":"Improved duplicate bug report identification","author":"Tian","year":"2012"},{"key":"ref16","series-title":"2013 10th Working Conference on Mining Software Repositories (MSR 2013)","first-page":"183","article-title":"A contextual approach towards more accurate duplicate bug report detection","author":"Alipour","year":"2013 May"},{"key":"ref17","series-title":"Proceedings of the 11th Working Conference on Mining Software Repositories","first-page":"308","article-title":"Improving the accuracy of duplicate bug report detection using textual similarity measures","author":"Lazar","year":"2014"},{"key":"ref18","series-title":"Proceedings of the 40th International Conference on Software Engineering: Companion","first-page":"193","article-title":"DWEN: deep word embedding network for duplicate bug report detection in software repositories","author":"Budhiraja","year":"2018 May"},{"key":"ref19","doi-asserted-by":"crossref","unstructured":"Zhang T, Irsan IC, Thung F, Lo D. Cupid: leveraging ChatGPT for more accurate duplicate bug report detection. 2023. doi:10.48550\/arXiv.2308.10022.","DOI":"10.1145\/3576042"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"200749","DOI":"10.1109\/ACCESS.2020.3033045","article-title":"Duplicate bug report detection and classification system based on deep learning technique","volume":"8","author":"Kukkar","year":"2020 Oct","journal-title":"IEEE Access"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"1233","DOI":"10.32604\/csse.2023.025991","article-title":"Adaptive deep learning model for software bug detection and classification","volume":"45","author":"Sivapurnima","year":"2023","journal-title":"Comput Syst Sci Eng"},{"key":"ref22","first-page":"578","article-title":"A systematic study of duplicate bug report detection","volume":"12","author":"Gupta","year":"2021 Jan","journal-title":"Int J Adv Comput Sci Appl"},{"key":"ref23","series-title":"IEEE\/ACM 28th International Conference on Program Comprehension","first-page":"117","article-title":"Duplicate bug report detection using dual-channel convolutional neural networks","author":"He","year":"2020 Oct"},{"key":"ref24","series-title":"2020 6th International Conference on Web Research","first-page":"288","article-title":"Automatic duplicate bug report detection using information retrieval-based versus machine learning based approaches","author":"Neysiani","year":"2020 Apr"},{"key":"ref25","series-title":"2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE)","first-page":"602","article-title":"RepresentThemAll: a universal learning representation of bug reports","author":"Fang","year":"2023 May"},{"key":"ref26","series-title":"Proceedings of the 2017 IEEE\/ACM 14th International Conference on Mining Software Repositories","first-page":"527","article-title":"Rediscovery datasets: connecting duplicate reports","author":"Sadat","year":"2017 May"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"123924","DOI":"10.1109\/ACCESS.2023.3329732","article-title":"An extended survey concerning the significance of artificial intelligence and machine learning techniques for bug triage and management","volume":"11","author":"Bocu","year":"2023","journal-title":"IEEE Access"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"111258","DOI":"10.1016\/j.knosys.2023.111258","article-title":"Duplicate bug report detection using named entity recognition","volume":"284","author":"Zheng","year":"2024","journal-title":"Knowl-Based Syst"},{"key":"ref29","series-title":"2023 IEEE International Conference on Software Analysis, Evolution and Reengineering","first-page":"25","article-title":"Towards understanding the impacts of textual dissimilarity on duplicate bug report detection","author":"Jahan","year":"2023 Mar"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"S275","DOI":"10.1007\/s13198-023-01855-x","article-title":"DENATURE: duplicate detection and type identification in open source bug repositories","volume":"14","author":"Chauhan","year":"Mar. 2023","journal-title":"Int J Syst Assur Eng Manag"},{"key":"ref31","first-page":"108318","article-title":"Sentence embedding and fine-tuning to automatically identify duplicate bugs","volume":"4","author":"Isotani","year":"2022","journal-title":"Front Comput Sci"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"111617","DOI":"10.1016\/j.jss.2023.111617","article-title":"Leveraging multi-level embeddings for knowledge-aware bug report reformulation","volume":"198","author":"Zhou","year":"2023","journal-title":"J Syst Softw"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"100048","DOI":"10.1016\/j.nlp.2023.100048","article-title":"A survey of GPT-3 family large language models including ChatGPT and GPT-4","volume":"6","author":"Kalyan","year":"2024","journal-title":"Nat Lang Process"},{"key":"ref34","unstructured":"Minaee S, Mikolov T, Nikzad N, Chenaghlu M, Socher R, Amatriain X, et al. Large language models: a survey. 2024 Feb. doi:10.48550\/arXiv.2402.06196."},{"key":"ref35","unstructured":"Zhao WX, Zhou K, Li J, Tang T, Wang X, Hou Y, et al. A survey of large language models. 2023 Nov. doi:10.48550\/arXiv.2303.18223."},{"key":"ref36","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"1051","article-title":"Large language models can self-improve","author":"Huang","year":"2023 Dec"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TKDE.2024.3352100","article-title":"Unifying large language models and knowledge graphs: a roadmap","volume":"36","author":"Pan","year":"2024","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"1166120","DOI":"10.3389\/fpubh.2023.1166120","article-title":"ChatGPT and the rise of large language models: the new AI-driven infodemic threat in public health","volume":"11","author":"De Angelis","year":"2023","journal-title":"Front Public Health"},{"key":"ref39","article-title":"LLaMA: open and efficient foundation language models","author":"Touvron","year":"2023 Feb","journal-title":"Tech Rep"},{"key":"ref40","series-title":"2009 IEEE International Advance Computing Conference","first-page":"1388","article-title":"Bug mining model based on event-component similarity to discover similar and duplicate GUI bugs","author":"Nagwani","year":"2009 Mar"},{"key":"ref41","series-title":"17th Asia Pacific Software Engineering Conference (APSEC 2010)","first-page":"366","article-title":"Detecting duplicate bug report using character n-gram-based features","author":"Sureka","year":"2010 Dec"},{"key":"ref42","unstructured":"Mikolov T. Efficient estimation of word representations in vector space. arXiv:1301.3781. 2013;3781."},{"key":"ref43","series-title":"World Wide Web Conference (WWW '19)","first-page":"2587","article-title":"Global vectors for node representations","author":"Brochier","year":"2019 May"},{"key":"ref44","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K. Bert: pre-training of deep bidirectional transformers for language understanding. 2018 Oct. doi:10.48550\/arXiv.1810.04805."},{"key":"ref45","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, et al. RoBERTa: a robustly optimized BERT pretraining approach. 2019. doi:10.48550\/arXiv.1907.11692."},{"key":"ref46","series-title":"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","first-page":"296","article-title":"Augmented SBERT: data augmentation method for improving bi-encoders for pairwise sentence scoring tasks","author":"Thakur","year":"2021 Jun"},{"key":"ref47","doi-asserted-by":"crossref","unstructured":"Fournier L, Dupoux E, Dunbar E. Analogies minus analogy test: measuring regularities in word embeddings. arXiv:2010.03446. 2020","DOI":"10.18653\/v1\/2020.conll-1.29"},{"key":"ref48","series-title":"5th International Conference on Web Research","first-page":"178","article-title":"New methodology for contextual features usage in duplicate bug reports detection","author":"Neysiani","year":"2019 Apr"},{"key":"ref49","series-title":"2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","first-page":"1622","article-title":"Fast detection of duplicate bug reports using LDA-based topic modeling and classification","author":"Akilan","year":"2020 Oct"},{"key":"ref50","unstructured":"Face H. Massive text embedding benchmark (MTEB); 2024 [cited 2024 Nov 20]. Available from: https:\/\/huggingface.co\/spaces\/mteb."},{"key":"ref51","unstructured":"Sanh V, Debut L, Chaumond J, Wolf T. Distilbert, a distilled version of BERT: smaller, faster, cheaper and lighter. 2019 Oct. doi:10.48550\/arXiv.1910.01108."},{"key":"ref52","doi-asserted-by":"crossref","unstructured":"Wolf T, Debut L, Sanh V, Chaumond J, Delangue C, Moi A, et al. Huggingface\u2019s transformers: state-of-the-art natural language processing. 2019 Oct. doi:10.48550\/arXiv.1910.03771.","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"ref53","series-title":"Thirtieth Brazilian Symposium on Software Engineering (SBES 2016)","first-page":"43","article-title":"Characterizing bug workflows in Mozilla Firefox","author":"Rocha","year":"2016 Sep"},{"key":"ref54","doi-asserted-by":"crossref","first-page":"545","DOI":"10.11144\/Javeriana.upsy10-2.cdcp","article-title":"Cliff\u2019s delta calculator: a non-parametric effect size program for two groups of observations","volume":"10","author":"Macbeth","year":"2011","journal-title":"Univ Psychol"},{"key":"ref55","doi-asserted-by":"crossref","first-page":"846","DOI":"10.1109\/TR.2022.3193645","article-title":"Duplicate bug report detection using an attention-based neural language model","volume":"72","author":"Messaoud","year":"2023","journal-title":"IEEE Trans Rel"},{"key":"ref56","unstructured":"Freestone M, Santu SKK. Word embeddings revisited: do LLMs offer something new?. 2024 Feb. doi:10.48550\/arXiv.2402.11094."},{"key":"ref57","unstructured":"Ye J, Xu N, Wang Y, Zhou J, Zhang Q, Gui T, et al. Data augmentation via large language models for few-shot named entity recognition. 2024 Feb. doi:10.48550\/arXiv.2402.14568."},{"key":"ref58","doi-asserted-by":"crossref","unstructured":"Li Y, Zhang R, Liu J. An enhanced prompt-based LLM reasoning scheme via knowledge graph-integrated collaboration. 2024 Feb. doi:10.48550\/arXiv.2402.14568.","DOI":"10.1007\/978-3-031-72344-5_17"},{"key":"ref59","series-title":"Proceedings of the Fifteenth International Conference on Predictive Models and Data Analytics in Software Engineering (PROMISE'19)","first-page":"16","article-title":"On usefulness of the deep-learning-based bug localization models to practitioners","author":"Polisetty","year":"2019 Sep"},{"key":"ref60","doi-asserted-by":"crossref","first-page":"8788","DOI":"10.3390\/app13158788","article-title":"A survey on bug deduplication and triage methods from multiple points of view","volume":"13","author":"Qian","year":"2023","journal-title":"Appl Sci"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-83-1\/TSP_CMC_57792\/TSP_CMC_57792.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T06:34:08Z","timestamp":1763102048000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v83n1\/60067"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":60,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.057792","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2024-08-27","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-01-13","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-03-26","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}