{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T15:05:36Z","timestamp":1767625536053,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,3]],"date-time":"2024-07-03T00:00:00Z","timestamp":1719964800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,3]]},"DOI":"10.1145\/3649217.3653622","type":"proceedings-article","created":{"date-parts":[[2024,7,3]],"date-time":"2024-07-03T18:30:20Z","timestamp":1720031420000},"page":"360-366","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Semantic Similarity Search for Source Code Plagiarism Detection: An Exploratory Study"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3194-8414","authenticated-orcid":false,"given":"Fahad","family":"Ebrahim","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Warwick, Coventry, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9826-5928","authenticated-orcid":false,"given":"Mike","family":"Joy","sequence":"additional","affiliation":[{"name":"Department of Computer Science, The University of Warwick, Coventry, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,7,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"key":"e_1_3_2_1_2_1","first-page":"1","article-title":"Plagiarism in programming assessments: a systematic review","volume":"20","author":"Albluwi Ibrahim","year":"2019","unstructured":"Ibrahim Albluwi. 2019. Plagiarism in programming assessments: a systematic review. ACM Transactions on Computing Education (TOCE), Vol. 20, 1 (2019), 1--28.","journal-title":"ACM Transactions on Computing Education (TOCE)"},{"key":"e_1_3_2_1_3_1","volume-title":"Material Survey on Source Code Plagiarism Detection in Programming Courses. In 2022 International Conference on Advanced Learning Technologies (ICALT). IEEE, 387--389","author":"Alexandru Olteanu Alexandra-Cristina","year":"2022","unstructured":"C^impeanu Alexandra-Cristina and SL Alexandru Olteanu. 2022. Material Survey on Source Code Plagiarism Detection in Programming Courses. In 2022 International Conference on Advanced Learning Technologies (ICALT). IEEE, 387--389."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/FIE49875.2021.9637155"},{"key":"e_1_3_2_1_5_1","volume-title":"A literature study of embeddings on source code. arXiv preprint arXiv:1904.03061","author":"Chen Zimin","year":"2019","unstructured":"Zimin Chen and Martin Monperrus. 2019. A literature study of embeddings on source code. arXiv preprint arXiv:1904.03061 (2019)."},{"volume-title":"Natural language processing. Fundamentals of artificial intelligence","year":"2020","key":"e_1_3_2_1_6_1","unstructured":"KR1442 Chowdhary. 2020. Natural language processing. Fundamentals of artificial intelligence (2020), 603--649."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324916000334"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TE.2007.906776"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19--1423"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-22327-3_31"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the Forum for Information Retrieval Evaluation. 21--30","author":"Flores Enrique","year":"2014","unstructured":"Enrique Flores, Paolo Rosso, Lidia Moreno, and Esa\u00fa Villatoro-Tello. 2014. On the detection of source code re-use. In Proceedings of the Forum for Information Retrieval Evaluation. 21--30."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the Forum for Information Retrieval Evaluation. 39--42","author":"Ganguly Debasis","year":"2014","unstructured":"Debasis Ganguly and Gareth JF Jones. 2014. DCU@ FIRE-2014: an information retrieval approach for source code plagiarism detection. In Proceedings of the Forum for Information Retrieval Evaluation. 39--42."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-017-9313-y"},{"key":"e_1_3_2_1_15_1","unstructured":"Tianyu Gao Xingcheng Yao and Danqi Chen. 2021. SimCSE: Simple Contrastive Learning of Sentence Embeddings. In Empirical Methods in Natural Language Processing (EMNLP)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2203.03850"},{"key":"e_1_3_2_1_17_1","volume-title":"Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366","author":"Guo Daya","year":"2020","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, et al. 2020. Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366 (2020)."},{"key":"e_1_3_2_1_18_1","unstructured":"Alexander Hermans Lucas Beyer and Bastian Leibe. 2017. In defense of the triplet loss for person re-identification. arXiv preprint arXiv:1703.07737 (2017)."},{"key":"e_1_3_2_1_19_1","volume-title":"Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436","author":"Husain Hamel","year":"2019","unstructured":"Hamel Husain, Ho-Hsiang Wu, Tiferet Gazit, Miltiadis Allamanis, and Marc Brockschmidt. 2019. Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436 (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"zip: Compressing text classification models. arXiv preprint arXiv:1612.03651","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Edouard Grave, Piotr Bojanowski, Matthijs Douze, H\u00e9rve J\u00e9gou, and Tomas Mikolov. 2016. Fasttext. zip: Compressing text classification models. arXiv preprint arXiv:1612.03651 (2016)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.15388\/infedu.2019.15"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TALE48000.2019.9225953"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_24_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1111\/jcal.12662"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.3390\/app12178805"},{"key":"e_1_3_2_1_27_1","volume-title":"Deep learning meets software engineering: A survey on pre-trained models of source code. arXiv preprint arXiv:2205.11739","author":"Niu Changan","year":"2022","unstructured":"Changan Niu, Chuanyi Li, Bin Luo, and Vincent Ng. 2022. Deep learning meets software engineering: A survey on pre-trained models of source code. arXiv preprint arXiv:2205.11739 (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"An Empirical Comparison of Pre-Trained Models of Source Code. arXiv preprint arXiv:2302.04026","author":"Niu Changan","year":"2023","unstructured":"Changan Niu, Chuanyi Li, Vincent Ng, Dongxiao Chen, Jidong Ge, and Bin Luo. 2023. An Empirical Comparison of Pre-Trained Models of Source Code. arXiv preprint arXiv:2302.04026 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Comparing the Pretrained Models of Source Code by Re-pretraining Under a Unified Setup","author":"Niu Changan","year":"2023","unstructured":"Changan Niu, Chuanyi Li, Vincent Ng, and Bin Luo. 2023. Comparing the Pretrained Models of Source Code by Re-pretraining Under a Unified Setup. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313290"},{"key":"e_1_3_2_1_31_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_1_32_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. https:\/\/doi.org\/10.48550\/ARXIV.2303.08774"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_34_1","first-page":"1016","article-title":"Finding plagiarisms among a set of programs with JPlag","volume":"8","author":"Prechelt Lutz","year":"2002","unstructured":"Lutz Prechelt, Guido Malpohl, Michael Philippsen, et al. 2002. Finding plagiarisms among a set of programs with JPlag. Journal of Universal Computer Science, Vol. 8, 11 (2002), 1016.","journal-title":"Journal of Universal Computer Science"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11431-020-1647-3"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1908.10084"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1049\/sfw2.12064"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872770"},{"key":"e_1_3_2_1_40_1","volume-title":"Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859 (2021)."},{"key":"e_1_3_2_1_41_1","first-page":"1","article-title":"String similarity via greedy string tiling and running Karp-Rabin matching","volume":"119","author":"Wise Michael J","year":"1993","unstructured":"Michael J Wise. 1993. String similarity via greedy string tiling and running Karp-Rabin matching. Online Preprint, Dec, Vol. 119, 1 (1993), 1--17.","journal-title":"Online Preprint"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et al. 2019. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019). https:\/\/doi.org\/10.48550\/arXiv.1910.03771","DOI":"10.48550\/arXiv.1910.03771"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.3390\/e25060888"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3520312.3534862"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2023.111796"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533767.3534390"}],"event":{"name":"ITiCSE 2024: Innovation and Technology in Computer Science Education","sponsor":["SIGCSE ACM Special Interest Group on Computer Science Education"],"location":"Milan Italy","acronym":"ITiCSE 2024"},"container-title":["Proceedings of the 2024 on Innovation and Technology in Computer Science Education V. 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649217.3653622","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649217.3653622","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T14:46:57Z","timestamp":1755787617000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649217.3653622"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,3]]},"references-count":46,"alternative-id":["10.1145\/3649217.3653622","10.1145\/3649217"],"URL":"https:\/\/doi.org\/10.1145\/3649217.3653622","relation":{},"subject":[],"published":{"date-parts":[[2024,7,3]]},"assertion":[{"value":"2024-07-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}