{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T16:37:03Z","timestamp":1772642223081,"version":"3.50.1"},"reference-count":94,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2018JQ6078"],"award-info":[{"award-number":["2018JQ6078"]}],"id":[{"id":"10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2022JM-342"],"award-info":[{"award-number":["2022JM-342"]}],"id":[{"id":"10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012270","name":"Shaanxi Key Science and Technology Innovation Team Project","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012270","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008780","name":"Xi'an University of Posts and Telecommunications","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100008780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272387"],"award-info":[{"award-number":["62272387"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61702414"],"award-info":[{"award-number":["61702414"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information and Software Technology"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1016\/j.infsof.2025.107766","type":"journal-article","created":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T19:09:46Z","timestamp":1747163386000},"page":"107766","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":3,"special_numbering":"C","title":["SolBERT: Advancing solidity smart contract similarity analysis via self-supervised pre-training and contrastive fine-tuning"],"prefix":"10.1016","volume":"184","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7608-8908","authenticated-orcid":false,"given":"Zhenzhou","family":"Tian","sequence":"first","affiliation":[]},{"given":"Yudong","family":"Teng","sequence":"additional","affiliation":[]},{"given":"Xianqun","family":"Ke","sequence":"additional","affiliation":[]},{"given":"Yanping","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1550-6170","authenticated-orcid":false,"given":"Lingwei","family":"Chen","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.infsof.2025.107766_b1","series-title":"Ethereum: A secure decentralised generalised transaction ledger","first-page":"1","author":"Wood","year":"2014"},{"issue":"1","key":"10.1016\/j.infsof.2025.107766_b2","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1109\/TSE.2020.2989002","article-title":"Defining smart contract defects on ethereum","volume":"48","author":"Chen","year":"2022","journal-title":"IEEE Trans. Softw. Eng."},{"issue":"6","key":"10.1016\/j.infsof.2025.107766_b3","doi-asserted-by":"crossref","first-page":"4617","DOI":"10.1007\/s10664-020-09852-5","article-title":"Code cloning in smart contracts: a case study on verified contracts from the ethereum blockchain platform","volume":"25","author":"Kondo","year":"2020","journal-title":"Empir. Softw. Eng."},{"key":"10.1016\/j.infsof.2025.107766_b4","series-title":"2019 IEEE\/ACM 27th International Conference on Program Comprehension","first-page":"105","article-title":"Enabling clone detection for ethereum via smart contract birthmarks","author":"Liu","year":"2019"},{"issue":"12","key":"10.1016\/j.infsof.2025.107766_b5","doi-asserted-by":"crossref","first-page":"2874","DOI":"10.1109\/TSE.2020.2971482","article-title":"Checking smart contracts with structural code embedding","volume":"47","author":"Gao","year":"2021","journal-title":"IEEE Trans. Softw. Eng."},{"key":"10.1016\/j.infsof.2025.107766_b6","series-title":"Proceedings of the 29th International Conference on Software Engineering","first-page":"96","article-title":"DECKARD: Scalable and accurate tree-based detection of code clones","author":"Jiang","year":"2007"},{"key":"10.1016\/j.infsof.2025.107766_b7","series-title":"Financial Cryptography and Data Security - 24th International Conference, FC 2020, Kota Kinabalu, Malaysia, February 10-14, 2020 Revised Selected Papers","first-page":"654","article-title":"Characterizing code clones in the ethereum smart contract ecosystem","volume":"vol. 12059","author":"He","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b8","series-title":"2019 IEEE\/ACM 27th International Conference on Program Comprehension","first-page":"260","article-title":"Recommending differentiated code to support smart contract update","author":"Huang","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b9","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1162\/tacl_a_00051","article-title":"Enriching word vectors with subword information","volume":"5","author":"Bojanowski","year":"2017","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"10.1016\/j.infsof.2025.107766_b10","series-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"Mikolov","year":"2013"},{"key":"10.1016\/j.infsof.2025.107766_b11","series-title":"Advances in Information Retrieval: 45th European Conference on Information Retrieval, ECIR 2023, Dublin, Ireland, April 2\u20136, 2023, Proceedings, Part II","first-page":"439","article-title":"Leveraging comment retrieval for code summarization","author":"Hou","year":"2023"},{"issue":"POPL","key":"10.1016\/j.infsof.2025.107766_b12","doi-asserted-by":"crossref","DOI":"10.1145\/3290353","article-title":"Code2vec: learning distributed representations of code","volume":"3","author":"Alon","year":"2019","journal-title":"Proc. ACM Program. Lang."},{"key":"10.1016\/j.infsof.2025.107766_b13","series-title":"2021 IEEE\/ACM 43rd International Conference on Software Engineering","first-page":"1186","article-title":"InferCode: Self-supervised learning of code representations by predicting subtrees","author":"Bui","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b14","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"North Am. Chapter of the Comput. Linguist."},{"key":"10.1016\/j.infsof.2025.107766_b15","series-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b16","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2020","first-page":"1536","article-title":"CodeBERT: A pre-trained model for programming and natural languages","author":"Feng","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b17","series-title":"The solidity contract-oriented programming language","year":"2023"},{"key":"10.1016\/j.infsof.2025.107766_b18","series-title":"FISCO BCOS.","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b19","series-title":"EVM-compatible smart contracts platform.","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b20","series-title":"1st International Conference on Learning Representations, ICLR 2013, Scottsdale, Arizona, USA, May 2-4, 2013, Workshop Track Proceedings","article-title":"Efficient estimation of word representations in vector space","author":"Mikolov","year":"2013"},{"key":"10.1016\/j.infsof.2025.107766_b21","series-title":"Proceedings of the 31st International Conference on International Conference on Machine Learning - Volume 32","first-page":"II","article-title":"Distributed representations of sentences and documents","author":"Le","year":"2014"},{"issue":"1","key":"10.1016\/j.infsof.2025.107766_b22","doi-asserted-by":"crossref","first-page":"450","DOI":"10.1002\/int.22633","article-title":"Landscape estimation of solidity version usage on ethereum via version identification","volume":"37","author":"Tian","year":"2022","journal-title":"Int. J. Intell. Syst."},{"key":"10.1016\/j.infsof.2025.107766_b23","series-title":"2019 IEEE Symposium on Security and Privacy","first-page":"472","article-title":"Asm2Vec: Boosting static representation robustness for binary clone search against code obfuscation and compiler optimization","author":"Ding","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b24","first-page":"1877","article-title":"Language models are few-shot learners","volume":"vol. 33","author":"Brown","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b25","series-title":"GPT-4 technical report","author":"OpenAI","year":"2023"},{"issue":"1","key":"10.1016\/j.infsof.2025.107766_b26","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.infsof.2025.107766_b27","series-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","first-page":"8696","article-title":"CodeT5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation","author":"Wang","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b28","series-title":"Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","first-page":"3236","article-title":"PalmTree: Learning an assembly language model for instruction embedding","author":"Li","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b29","series-title":"Advances in Neural Information Processing Systems","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.infsof.2025.107766_b30","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/j.aiopen.2022.10.001","article-title":"A survey of transformers","volume":"3","author":"Lin","year":"2022","journal-title":"AI Open"},{"key":"10.1016\/j.infsof.2025.107766_b31","series-title":"Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence","first-page":"1606","article-title":"Learning unified features from natural and programming languages for locating buggy source code","author":"Huo","year":"2016"},{"key":"10.1016\/j.infsof.2025.107766_b32","series-title":"2018 17th IEEE International Conference on Machine Learning and Applications","first-page":"757","article-title":"Automated vulnerability detection in source code using deep representation learning","author":"Russell","year":"2018"},{"key":"10.1016\/j.infsof.2025.107766_b33","series-title":"Proceedings of the 2017 11th Joint Meeting on Foundations of Software Engineering","first-page":"763","article-title":"Are deep neural networks the best choice for modeling source code?","author":"Hellendoorn","year":"2017"},{"issue":"3","key":"10.1016\/j.infsof.2025.107766_b34","doi-asserted-by":"crossref","first-page":"2179","DOI":"10.1007\/s10664-019-09730-9","article-title":"Deep code comment generation with hybrid lexical and syntactical information","volume":"25","author":"Hu","year":"2020","journal-title":"Empir. Softw. Engg."},{"key":"10.1016\/j.infsof.2025.107766_b35","series-title":"Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","first-page":"1571","article-title":"DeepCommenter: A deep code comment generation tool with hybrid lexical and syntactical information","author":"Li","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b36","series-title":"Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering","first-page":"1385","article-title":"Retrieval-based neural source code summarization","author":"Zhang","year":"2020"},{"issue":"16","key":"10.1016\/j.infsof.2025.107766_b37","first-page":"14015","article-title":"Code completion by modeling flattened abstract syntax trees as graphs","volume":"35","author":"Wang","year":"2021","journal-title":"Proc. the AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.infsof.2025.107766_b38","series-title":"Antlr.","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b39","series-title":"Proceedings of the 44th International Conference on Software Engineering","first-page":"2006","article-title":"SPT-code: Sequence-to-sequence pre-training for learning source code representations","author":"Niu","year":"2022"},{"issue":"01","key":"10.1016\/j.infsof.2025.107766_b40","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1109\/TDSC.2024.3397660","article-title":"BinBert: Binary code understanding with a fine-tunable and execution-aware transformer","volume":"22","author":"Artuso","year":"2025","journal-title":"IEEE Trans. Dependable Secur. Comput."},{"key":"10.1016\/j.infsof.2025.107766_b41","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b42","doi-asserted-by":"crossref","first-page":"64","DOI":"10.1162\/tacl_a_00300","article-title":"SpanBERT: Improving pre-training by representing and predicting spans","volume":"8","author":"Joshi","year":"2020","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"10.1016\/j.infsof.2025.107766_b43","series-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","article-title":"Xlnet: Generalized autoregressive pretraining for language understanding","author":"Yang","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b44","series-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b45","series-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","first-page":"3982","article-title":"Sentence-BERT: Sentence embeddings using siamese BERT-networks","author":"Reimers","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b46","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"878","article-title":"Language-agnostic BERT sentence embedding","author":"Feng","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b47","series-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing","first-page":"9119","article-title":"On the sentence embeddings from pre-trained language models","author":"Li","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b48","series-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","first-page":"1442","article-title":"Fast, effective, and self-supervised: Transforming masked language models into universal lexical and sentence encoders","author":"Liu","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b49","series-title":"Proceedings of the 37th International Conference on Machine Learning","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b50","first-page":"18661","article-title":"Supervised contrastive learning","volume":"vol. 33","author":"Khosla","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b51","series-title":"What Makes for Good Views for Contrastive Learning?","author":"Tian","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b52","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2021","first-page":"238","article-title":"WhiteningBERT: An easy unsupervised sentence embedding approach","author":"Huang","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b53","series-title":"Proceedings of the 30th IEEE\/ACM International Conference on Program Comprehension","first-page":"82","article-title":"Self-supervised learning of smart contract representations","author":"Yang","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b54","series-title":"2017 IEEE Symposium on Security and Privacy","first-page":"595","article-title":"VUDDY: A scalable approach for vulnerable code clone discovery","author":"Kim","year":"2017"},{"issue":"C","key":"10.1016\/j.infsof.2025.107766_b55","article-title":"VDSimilar: Vulnerability detection based on code similarity of vulnerabilities and patches","volume":"110","author":"Sun","year":"2021","journal-title":"Comput. Secur."},{"key":"10.1016\/j.infsof.2025.107766_b56","series-title":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","first-page":"64","article-title":"Hunting bugs with accelerated optimal graph vertex matching","author":"Zhang","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b57","series-title":"Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security","first-page":"480","article-title":"Scalable graph-based bug search for firmware images","author":"Feng","year":"2016"},{"key":"10.1016\/j.infsof.2025.107766_b58","series-title":"Proceedings of the 3rd ACM International Symposium on Blockchain and Secure Critical Infrastructure","first-page":"47","article-title":"Eth2Vec: Learning contract-wide code representations for vulnerability detection on ethereum smart contracts","author":"Ashizawa","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b59","series-title":"2017 26th International Conference on Computer Communication and Networks","first-page":"1","article-title":"Automated labeling of unknown contracts in ethereum","author":"Norvill","year":"2017"},{"key":"10.1016\/j.infsof.2025.107766_b60","series-title":"2020 IEEE 19th International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom)","first-page":"1910","article-title":"Assessing the similarity of smart contracts by clustering their interfaces","author":"Di Angelo","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b61","series-title":"Artificial Neural Networks \u2013 ICANN 2009","first-page":"175","article-title":"On the use of the adjusted rand index as a metric for evaluating supervised classification","author":"Santos","year":"2009"},{"key":"10.1016\/j.infsof.2025.107766_b62","series-title":"International Conference on Learning Representations","article-title":"Code2seq: Generating sequences from structured representations of code","author":"Alon","year":"2019"},{"key":"10.1016\/j.infsof.2025.107766_b63","series-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"Sutskever","year":"2014"},{"key":"10.1016\/j.infsof.2025.107766_b64","series-title":"International Conference on Learning Representations","article-title":"GraphCodeBERT: Pre-training code representations with data flow","author":"Guo","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b65","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"7212","article-title":"UniXcoder: Unified cross-modal pre-training for code representation","author":"Guo","year":"2022"},{"key":"10.1016\/j.infsof.2025.107766_b66","series-title":"The 2023 Conference on Empirical Methods in Natural Language Processing","article-title":"CodeT5+: Open code large language models for code understanding and generation","author":"Wang","year":"2023"},{"issue":"1","key":"10.1016\/j.infsof.2025.107766_b67","doi-asserted-by":"crossref","DOI":"10.1016\/j.patter.2024.101118","article-title":"A survey of multilingual large language models","volume":"6","author":"Qin","year":"2025","journal-title":"Patterns"},{"key":"10.1016\/j.infsof.2025.107766_b68","series-title":"Proceedings of the Internet Measurement Conference 2018","first-page":"494","article-title":"Analyzing ethereum\u2019s contract topology","author":"Kiffer","year":"2018"},{"key":"10.1016\/j.infsof.2025.107766_b69","series-title":"2021 IEEE International Conference on Software Analysis, Evolution and Reengineering","first-page":"470","article-title":"Understanding code reuse in smart contracts","author":"Chen","year":"2021"},{"issue":"12","key":"10.1016\/j.infsof.2025.107766_b70","doi-asserted-by":"crossref","first-page":"1217","DOI":"10.1109\/TSE.2015.2454508","article-title":"Software plagiarism detection with birthmarks based on dynamic key instruction sequences","volume":"41","author":"Tian","year":"2015","journal-title":"IEEE Trans. Softw. Eng."},{"issue":"C","key":"10.1016\/j.infsof.2025.107766_b71","doi-asserted-by":"crossref","first-page":"136","DOI":"10.1016\/j.jss.2016.06.014","article-title":"Exploiting thread-related system calls for plagiarism detection of multithreaded programs","volume":"119","author":"Tian","year":"2016","journal-title":"J. Syst. Softw."},{"issue":"5","key":"10.1016\/j.infsof.2025.107766_b72","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1109\/TSE.2017.2688383","article-title":"Reviving sequential program birthmarking for multithreaded software plagiarism detection","volume":"44","author":"Tian","year":"2018","journal-title":"IEEE Trans. Softw. Eng."},{"key":"10.1016\/j.infsof.2025.107766_b73","series-title":"2020 IEEE 20th International Conference on Software Quality, Reliability and Security","first-page":"297","article-title":"Early detection of smart ponzi scheme contracts based on behavior forest similarity","author":"Sun","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b74","doi-asserted-by":"crossref","first-page":"49160","DOI":"10.1109\/ACCESS.2021.3069227","article-title":"Fine-grained compiler identification with sequence-oriented neural modeling","volume":"9","author":"Tian","year":"2021","journal-title":"IEEE Access"},{"issue":"6","key":"10.1016\/j.infsof.2025.107766_b75","doi-asserted-by":"crossref","DOI":"10.1145\/3597206","article-title":"Semantic-enriched code knowledge graph to reveal unknowns in smart contract code reuse","volume":"32","author":"Huang","year":"2023","journal-title":"ACM Trans. Softw. Eng. Methodol."},{"key":"10.1016\/j.infsof.2025.107766_b76","series-title":"Proceedings of the 35th IEEE\/ACM International Conference on Automated Software Engineering","first-page":"1400","article-title":"When deep learning meets smart contracts","author":"Gao","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b77","series-title":"2021 International Conference on Computer Information Science and Artificial Intelligence","first-page":"558","article-title":"Similarity measure for smart contract bytecode based on CFG feature extraction","author":"Zhu","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b78","series-title":"The 34th International Conference on Software Engineering and Knowledge Engineering, SEKE 2022, KSIR Virtual Conference Center, USA, July 1 - July 10, 2022","first-page":"513","article-title":"Ethereum smart contract representation learning for robust bytecode-level similarity detection","author":"Tian","year":"2022"},{"issue":"4","key":"10.1016\/j.infsof.2025.107766_b79","doi-asserted-by":"crossref","DOI":"10.3390\/electronics11040597","article-title":"Bytecode similarity detection of smart contract across optimization options and compiler versions based on triplet network","volume":"11","author":"Zhu","year":"2022","journal-title":"Electronics"},{"key":"10.1016\/j.infsof.2025.107766_b80","doi-asserted-by":"crossref","DOI":"10.1016\/j.infsof.2024.107517","article-title":"A vulnerability detection framework by focusing on critical execution paths","volume":"174","author":"Cheng","year":"2024","journal-title":"Inf. Softw. Technol."},{"key":"10.1016\/j.infsof.2025.107766_b81","series-title":"Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering","article-title":"SCVHunter: Smart contract vulnerability detection based on heterogeneous graph attention network","author":"Luo","year":"2024"},{"key":"10.1016\/j.infsof.2025.107766_b82","series-title":"2021 IEEE 32nd International Symposium on Software Reliability Engineering","first-page":"378","article-title":"Peculiar: Smart contract vulnerability detection based on crucial data flow graph and pre-training techniques","author":"Wu","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b83","series-title":"Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering","article-title":"Pre-training by predicting program dependencies for vulnerability analysis tasks","author":"Liu","year":"2024"},{"key":"10.1016\/j.infsof.2025.107766_b84","series-title":"Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering","article-title":"Reentrancy vulnerability detection and localization: A deep learning based two-phase approach","author":"Zhang","year":"2023"},{"key":"10.1016\/j.infsof.2025.107766_b85","doi-asserted-by":"crossref","DOI":"10.1016\/j.jss.2023.111919","article-title":"Fine-grained smart contract vulnerability detection by heterogeneous code feature learning and automated dataset construction","volume":"209","author":"Cai","year":"2024","journal-title":"J. Syst. Softw."},{"key":"10.1016\/j.infsof.2025.107766_b86","doi-asserted-by":"crossref","DOI":"10.1016\/j.infsof.2024.107405","article-title":"Automatic smart contract comment generation via large language models and in-context learning","volume":"168","author":"Zhao","year":"2024","journal-title":"Inf. Softw. Technol."},{"issue":"2","key":"10.1016\/j.infsof.2025.107766_b87","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1007\/s10664-024-10602-0","article-title":"Towards an understanding of large language models in software engineering tasks","volume":"30","author":"Zheng","year":"2024","journal-title":"Empir. Softw. Eng."},{"key":"10.1016\/j.infsof.2025.107766_b88","first-page":"14967","article-title":"DOBF: A deobfuscation pre-training objective for programming languages","volume":"vol. 34","author":"Lachaux","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b89","series-title":"Unsupervised Translation of Programming Languages","author":"Roziere","year":"2020"},{"key":"10.1016\/j.infsof.2025.107766_b90","series-title":"Proceedings of the Thirty-Seventh Conference on Uncertainty in Artificial Intelligence","first-page":"54","article-title":"Treebert: A tree-based pre-trained model for programming language","volume":"161","author":"Jiang","year":"2021"},{"key":"10.1016\/j.infsof.2025.107766_b91","series-title":"Proceedings of the 45th International Conference on Software Engineering","first-page":"2425","article-title":"Automating code-related tasks through transformers: The impact of pre-training","author":"Tufano","year":"2023"},{"issue":"PLDI","key":"10.1016\/j.infsof.2025.107766_b92","doi-asserted-by":"crossref","DOI":"10.1145\/3591227","article-title":"Discrete adversarial attack to models of code","volume":"7","author":"Gao","year":"2023","journal-title":"Proc. ACM Program. Lang."},{"key":"10.1016\/j.infsof.2025.107766_b93","doi-asserted-by":"crossref","unstructured":"C. Na, Y. Choi, J.-H. Lee, DIP: Dead code Insertion based Black-box Attack for Programming Language Model, in: A. Rogers, J. Boyd-Graber, N. Okazaki (Eds.), ACL, Toronto, Canada, 2023, pp. 7777\u20137791, http:\/\/dx.doi.org\/10.18653\/v1\/2023.acl-long.430, URL https:\/\/aclanthology.org\/2023.acl-long.430.","DOI":"10.18653\/v1\/2023.acl-long.430"},{"issue":"9","key":"10.1016\/j.infsof.2025.107766_b94","doi-asserted-by":"crossref","first-page":"4456","DOI":"10.1109\/TSE.2023.3298609","article-title":"Bian: Smart contract source code obfuscation","volume":"49","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Softw. Eng."}],"container-title":["Information and Software Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950584925001053?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950584925001053?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T09:33:24Z","timestamp":1762335204000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950584925001053"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":94,"alternative-id":["S0950584925001053"],"URL":"https:\/\/doi.org\/10.1016\/j.infsof.2025.107766","relation":{},"ISSN":["0950-5849"],"issn-type":[{"value":"0950-5849","type":"print"}],"subject":[],"published":{"date-parts":[[2025,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"SolBERT: Advancing solidity smart contract similarity analysis via self-supervised pre-training and contrastive fine-tuning","name":"articletitle","label":"Article Title"},{"value":"Information and Software Technology","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.infsof.2025.107766","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"107766"}}