{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T20:59:23Z","timestamp":1780779563435,"version":"3.54.1"},"reference-count":99,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Systems and Software"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.jss.2026.112952","type":"journal-article","created":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T23:58:06Z","timestamp":1778889486000},"page":"112952","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A metamorphic testing perspective on knowledge distillation for language models of code: Does the student deeply mimic the teacher?"],"prefix":"10.1016","volume":"240","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-5760-8211","authenticated-orcid":false,"given":"Md. Abdul","family":"Awal","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mrigank","family":"Rochan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chanchal K.","family":"Roy","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.jss.2026.112952_b1","series-title":"2025 IEEE International Conference on Software Maintenance and Evolution","first-page":"1","article-title":"Is quantization a deal-breaker? Empirical insights from large code models","author":"Afrin","year":"2025"},{"key":"10.1016\/j.jss.2026.112952_b2","series-title":"Unified pre-training for program understanding and generation","author":"Ahmad","year":"2021"},{"key":"10.1016\/j.jss.2026.112952_b3","doi-asserted-by":"crossref","unstructured":"Ahmed, T., Pai, K.S., Devanbu, P., Barr, E., 2024. Automatic semantic augmentation of language model prompts (for code summarization). In: Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering. pp. 1\u201313.","DOI":"10.1145\/3597503.3639183"},{"key":"10.1016\/j.jss.2026.112952_b4","series-title":"16th IWSC","first-page":"39","article-title":"Codebert for code clone detection: A replication study","author":"Arshad","year":"2022"},{"key":"10.1016\/j.jss.2026.112952_b5","series-title":"Moekd: Mixture-of-experts knowledge distillation for robust and high-performing compressed code models","author":"Awal","year":"2026"},{"key":"10.1016\/j.jss.2026.112952_b6","article-title":"Do deep nets really need to be deep?","volume":"27","author":"Ba","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.jss.2026.112952_b7","doi-asserted-by":"crossref","unstructured":"Bucilu\u01ce, C., Caruana, R., Niculescu-Mizil, A., 2006. Model compression. In: Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. pp. 535\u2013541.","DOI":"10.1145\/1150402.1150464"},{"key":"10.1016\/j.jss.2026.112952_b8","series-title":"Metamorphic testing: a new approach for generating next test cases","author":"Chen","year":"2020"},{"key":"10.1016\/j.jss.2026.112952_b9","doi-asserted-by":"crossref","unstructured":"Chen, J., Hu, X., Li, Z., Gao, C., Xia, X., Lo, D., 2024. Code search is all you need? improving code suggestions with code search. In: Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering. pp. 1\u201313.","DOI":"10.1145\/3597503.3639085"},{"issue":"1","key":"10.1016\/j.jss.2026.112952_b10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3143561","article-title":"Metamorphic testing: A review of challenges and opportunities","volume":"51","author":"Chen","year":"2018","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.jss.2026.112952_b11","series-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"10.1016\/j.jss.2026.112952_b12","series-title":"International Conference on Machine Learning","first-page":"2285","article-title":"Compressing neural networks with the hashing trick","author":"Chen","year":"2015"},{"issue":"FSE","key":"10.1016\/j.jss.2026.112952_b13","doi-asserted-by":"crossref","first-page":"3057","DOI":"10.1145\/3729405","article-title":"Smaller but better: Self-paced knowledge distillation for lightweight yet effective LCMs","volume":"2","author":"Chen","year":"2025","journal-title":"Proc. the ACM Softw. Eng."},{"key":"10.1016\/j.jss.2026.112952_b14","series-title":"2025 IEEE International Conference on Software Analysis, Evolution and Reengineering","first-page":"12","article-title":"On the compression of language models for code: An empirical study on codebert","author":"d\u2019Aloisio","year":"2024"},{"key":"10.1016\/j.jss.2026.112952_b15","series-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"10.1016\/j.jss.2026.112952_b16","series-title":"2025 IEEE\/ACM 47th International Conference on Software Engineering","first-page":"1729","article-title":"Vulnerability detection with code language models: How far are we?","author":"Ding","year":"2024"},{"key":"10.1016\/j.jss.2026.112952_b17","doi-asserted-by":"crossref","unstructured":"Du, M., Mukherjee, S., Cheng, Y., Shokouhi, M., Hu, X., Awadallah, A.H., 2021. Robustness challenges in model distillation and pruning for natural language understanding. In: Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics. pp. 1766\u20131778.","DOI":"10.18653\/v1\/2023.eacl-main.129"},{"key":"10.1016\/j.jss.2026.112952_b18","doi-asserted-by":"crossref","unstructured":"Du, X., Wen, M., Wei, Z., Wang, S., Jin, H., 2023. An Extensive Study on Adversarial Attack against Pre-trained Models of Code. In: 31st FSE. pp. 489\u2013501.","DOI":"10.1145\/3611643.3616356"},{"issue":"55","key":"10.1016\/j.jss.2026.112952_b19","first-page":"1","article-title":"Neural architecture search: A survey","volume":"20","author":"Elsken","year":"2019","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.jss.2026.112952_b20","doi-asserted-by":"crossref","unstructured":"Feng, Z., Guo, D., Tang, D., Duan, N., Feng, X., Gong, M., Shou, L., Qin, B., Liu, T., Jiang, D., et al., 2020. Codebert: A pre-trained model for programming and natural languages. In: Findings of the Association for Computational Linguistics: EMNLP 2020. pp. 1536\u20131547.","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"issue":"200","key":"10.1016\/j.jss.2026.112952_b21","doi-asserted-by":"crossref","first-page":"675","DOI":"10.1080\/01621459.1937.10503522","article-title":"The use of ranks to avoid the assumption of normality implicit in the analysis of variance","volume":"32","author":"Friedman","year":"1937","journal-title":"J. Amer. Statist. Assoc."},{"key":"10.1016\/j.jss.2026.112952_b22","first-page":"3996","article-title":"Adversarially robust distillation","volume":"vol. 34","author":"Goldblum","year":"2020"},{"key":"10.1016\/j.jss.2026.112952_b23","doi-asserted-by":"crossref","unstructured":"Gordon, M.A., Duh, K., Andrews, N., 2020. Compressing bert: Studying the effects of weight pruning on transfer learning. In: Proceedings of the 5th Workshop on Representation Learning for NLP. pp. 143\u2013155.","DOI":"10.18653\/v1\/2020.repl4nlp-1.18"},{"key":"10.1016\/j.jss.2026.112952_b24","series-title":"2024 IEEE 10th International Conference on Edge Computing and Scalable Cloud (EdgeCom)","first-page":"13","article-title":"Improving robustness of compressed models with weight sharing through knowledge distillation","author":"Gourtani","year":"2024"},{"key":"10.1016\/j.jss.2026.112952_b25","series-title":"International Conference on Machine Learning","first-page":"1321","article-title":"On calibration of modern neural networks","author":"Guo","year":"2017"},{"key":"10.1016\/j.jss.2026.112952_b26","series-title":"Graphcodebert: Pre-training code representations with data flow","author":"Guo","year":"2020"},{"key":"10.1016\/j.jss.2026.112952_b27","series-title":"2019 IEEE\/ACM 41st International Conference on Software Engineering","first-page":"960","article-title":"When code completion fails: A case study on real-world completions","author":"Hellendoorn","year":"2019"},{"key":"10.1016\/j.jss.2026.112952_b28","series-title":"A baseline for detecting misclassified and out-of-distribution examples in neural networks","author":"Hendrycks","year":"2016"},{"key":"10.1016\/j.jss.2026.112952_b29","series-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"issue":"8","key":"10.1016\/j.jss.2026.112952_b30","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3695988","article-title":"Large language models for software engineering: A systematic literature review","volume":"33","author":"Hou","year":"2024","journal-title":"ACM Trans. Softw. Eng. Methodol."},{"key":"10.1016\/j.jss.2026.112952_b31","series-title":"Codesearchnet challenge: Evaluating the state of semantic code search","author":"Husain","year":"2019"},{"key":"10.1016\/j.jss.2026.112952_b32","series-title":"Grammarly","author":"Inc.","year":"2025"},{"issue":"1","key":"10.1016\/j.jss.2026.112952_b33","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","article-title":"Adaptive mixtures of local experts","volume":"3","author":"Jacobs","year":"1991","journal-title":"Neural Comput."},{"key":"10.1016\/j.jss.2026.112952_b34","series-title":"Beware of calibration data for pruning large language models","author":"Ji","year":"2024"},{"issue":"5","key":"10.1016\/j.jss.2026.112952_b35","doi-asserted-by":"crossref","first-page":"649","DOI":"10.1109\/TSE.2010.62","article-title":"An analysis and survey of the development of mutation testing","volume":"37","author":"Jia","year":"2010","journal-title":"IEEE Trans. Softw. Eng."},{"key":"10.1016\/j.jss.2026.112952_b36","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Chan, C., Chen, M., Wang, W., 2023. Lion: Adversarial distillation of proprietary large language models. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing. pp. 3134\u20133154.","DOI":"10.18653\/v1\/2023.emnlp-main.189"},{"key":"10.1016\/j.jss.2026.112952_b37","doi-asserted-by":"crossref","unstructured":"Jiao, X., Yin, Y., Shang, L., Jiang, X., Chen, X., Li, L., Wang, F., Liu, Q., 2020. Tinybert: Distilling bert for natural language understanding. In: Findings of the Association for Computational Linguistics: EMNLP 2020. pp. 4163\u20134174.","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"issue":"1","key":"10.1016\/j.jss.2026.112952_b38","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1214\/aoms\/1177729694","article-title":"On information and sufficiency","volume":"22","author":"Kullback","year":"1951","journal-title":"Ann. Math. Stat."},{"key":"10.1016\/j.jss.2026.112952_b39","doi-asserted-by":"crossref","unstructured":"Kwan, W.-C., Zeng, X., Jiang, Y., Wang, Y., Li, L., Shang, L., Jiang, X., Liu, Q., Wong, K.-F., 2024. Mt-eval: A multi-turn capabilities evaluation benchmark for large language models. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing. pp. 20153\u201320177.","DOI":"10.18653\/v1\/2024.emnlp-main.1124"},{"issue":"7553","key":"10.1016\/j.jss.2026.112952_b40","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"LeCun","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.jss.2026.112952_b41","series-title":"Roberta: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"issue":"1","key":"10.1016\/j.jss.2026.112952_b42","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10515-025-00560-2","article-title":"PIONEER: improving the robustness of student models when compressing pre-trained models of code","volume":"33","author":"Liu","year":"2026","journal-title":"Autom. Softw. Eng."},{"key":"10.1016\/j.jss.2026.112952_b43","series-title":"Starcoder 2 and the stack v2: The next generation","author":"Lozhkov","year":"2024"},{"key":"10.1016\/j.jss.2026.112952_b44","series-title":"Codexglue: A machine learning benchmark dataset for code understanding and generation","author":"Lu","year":"2021"},{"key":"10.1016\/j.jss.2026.112952_b45","article-title":"Obtaining well calibrated probabilities using bayesian binning","volume":"vol. 29","author":"Naeini","year":"2015"},{"key":"10.1016\/j.jss.2026.112952_b46","series-title":"ChatGPT","author":"OpenAI","year":"2025"},{"key":"10.1016\/j.jss.2026.112952_b47","series-title":"2025 IEEE\/ACM 47th International Conference on Software Engineering","first-page":"766","article-title":"Metamorphic-based many-objective distillation of LLMs for code-related tasks","author":"Panichella","year":"2025"},{"key":"10.1016\/j.jss.2026.112952_b48","doi-asserted-by":"crossref","unstructured":"Park, W., Kim, D., Lu, Y., Cho, M., 2019. Relational knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3967\u20133976.","DOI":"10.1109\/CVPR.2019.00409"},{"key":"10.1016\/j.jss.2026.112952_b49","series-title":"Proceedings of the ACM on Foundation of Software Engineering","first-page":"12","article-title":"An adaptive language-agnostic pruning method for greener language models for code","author":"Saad","year":"2025"},{"key":"10.1016\/j.jss.2026.112952_b50","series-title":"2013 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"6655","article-title":"Low-rank matrix factorization for deep neural network training with high-dimensional output targets","author":"Sainath","year":"2013"},{"key":"10.1016\/j.jss.2026.112952_b51","series-title":"Distilbert, a distilled version of BERT: smaller, faster, cheaper and lighter","author":"Sanh","year":"2019"},{"key":"10.1016\/j.jss.2026.112952_b52","first-page":"20378","article-title":"Movement pruning: Adaptive sparsity by fine-tuning","volume":"33","author":"Sanh","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"12","key":"10.1016\/j.jss.2026.112952_b53","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1145\/3381831","article-title":"Green ai","volume":"63","author":"Schwartz","year":"2020","journal-title":"Commun. ACM"},{"key":"10.1016\/j.jss.2026.112952_b54","doi-asserted-by":"crossref","unstructured":"Segura, S., Parejo, J.A., Troya, J., Ruiz-Cort\u00e9s, A., 2018. Metamorphic testing of RESTful web APIs. In: Proceedings of the 40th International Conference on Software Engineering. pp. 882\u2013882.","DOI":"10.1145\/3180155.3182528"},{"key":"10.1016\/j.jss.2026.112952_b55","doi-asserted-by":"crossref","unstructured":"Shang, Y., Liu, G., Kompella, R.R., Yan, Y., 2024. Enhancing post-training quantization calibration through contrastive learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 15921\u201315930.","DOI":"10.1109\/CVPR52733.2024.01507"},{"key":"10.1016\/j.jss.2026.112952_b56","series-title":"Proceedings of the 46th International Conference on Software Engineering: Software Engineering in Society","first-page":"142","article-title":"Greening large language models of code","author":"Shi","year":"2024"},{"key":"10.1016\/j.jss.2026.112952_b57","series-title":"ACM Transactions on Software Engineering and Methodology","article-title":"Efficient and green large language models for software engineering: Vision and the road ahead","author":"Shi","year":"2024"},{"key":"10.1016\/j.jss.2026.112952_b58","series-title":"Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering","article-title":"Compressing pre-trained models of code into 3 MB","author":"Shi","year":"2023"},{"issue":"6","key":"10.1016\/j.jss.2026.112952_b59","first-page":"1","article-title":"Fairness testing of machine translation systems","volume":"33","author":"Sun","year":"2024","journal-title":"ACM Trans. Softw. Eng. Methodol."},{"key":"10.1016\/j.jss.2026.112952_b60","doi-asserted-by":"crossref","unstructured":"Sun, S., Cheng, Y., Gan, Z., Liu, J., 2019. Patient knowledge distillation for bert model compression. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). pp. 4323\u20134332.","DOI":"10.18653\/v1\/D19-1441"},{"key":"10.1016\/j.jss.2026.112952_b61","doi-asserted-by":"crossref","unstructured":"Sun, Z., Yu, H., Song, X., Liu, R., Yang, Y., Zhou, D., 2020. Mobilebert: a compact task-agnostic bert for resource-limited devices. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. pp. 2158\u20132170.","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"10.1016\/j.jss.2026.112952_b62","series-title":"2014 IEEE International Conference on Software Maintenance and Evolution","first-page":"476","article-title":"Towards a big data curated benchmark of inter-project code clones","author":"Svajlenko","year":"2014"},{"key":"10.1016\/j.jss.2026.112952_b63","series-title":"Intriguing properties of neural networks","author":"Szegedy","year":"2013"},{"key":"10.1016\/j.jss.2026.112952_b64","series-title":"Distilling task-specific knowledge from bert into simple neural networks","author":"Tang","year":"2019"},{"key":"10.1016\/j.jss.2026.112952_b65","series-title":"2023 38th IEEE\/ACM International Conference on Automated Software Engineering","first-page":"850","article-title":"Code difference guided adversarial example generation for deep code models","author":"Tian","year":"2023"},{"key":"10.1016\/j.jss.2026.112952_b66","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.jss.2026.112952_b67","doi-asserted-by":"crossref","unstructured":"Wang, W., Bao, H., Huang, S., Dong, L., Wei, F., 2021. Minilmv2: Multi-head self-attention relation distillation for compressing pretrained transformers. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021. pp. 2140\u20132151.","DOI":"10.18653\/v1\/2021.findings-acl.188"},{"key":"10.1016\/j.jss.2026.112952_b68","series-title":"2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering","first-page":"261","article-title":"Detecting code clones with graph neural network and flow-augmented abstract syntax tree","author":"Wang","year":"2020"},{"key":"10.1016\/j.jss.2026.112952_b69","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wang, W., Joty, S., Hoi, S.C., 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing. pp. 8696\u20138708.","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10.1016\/j.jss.2026.112952_b70","first-page":"5776","article-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers","volume":"33","author":"Wang","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.jss.2026.112952_b71","series-title":"An empirical study of knowledge distillation for code understanding tasks","author":"Wang","year":"2025"},{"issue":"6","key":"10.1016\/j.jss.2026.112952_b72","doi-asserted-by":"crossref","first-page":"3048","DOI":"10.1109\/TPAMI.2021.3055564","article-title":"Knowledge distillation and student-teacher learning for visual intelligence: A review and new outlooks","volume":"44","author":"Wang","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.jss.2026.112952_b73","doi-asserted-by":"crossref","unstructured":"Wei, X., Gonugondla, S.K., Wang, S., Ahmad, W., Ray, B., Qian, H., Li, X., Kumar, V., Wang, Z., Tian, Y., et al., 2023. Towards greener yet powerful code generation via quantization: An empirical study. In: Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering. pp. 224\u2013236.","DOI":"10.1145\/3611643.3616302"},{"key":"10.1016\/j.jss.2026.112952_b74","series-title":"PROCEEDINGS the Eighth International Symposium on Software Reliability Engineering","first-page":"264","article-title":"A study of effective regression testing in practice","author":"Wong","year":"1997"},{"key":"10.1016\/j.jss.2026.112952_b75","first-page":"1","article-title":"Wilcoxon signed-rank test","author":"Woolson","year":"2007","journal-title":"Wiley Encycl. Clin. Trials"},{"issue":"FSE","key":"10.1016\/j.jss.2026.112952_b76","doi-asserted-by":"crossref","first-page":"1432","DOI":"10.1145\/3715784","article-title":"Detecting and reducing the factual hallucinations of large language models with metamorphic testing","volume":"2","author":"Wu","year":"2025","journal-title":"Proc. the ACM Softw. Eng."},{"issue":"1","key":"10.1016\/j.jss.2026.112952_b77","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3508035","article-title":"Metamorphic testing of deep learning compilers","volume":"6","author":"Xiao","year":"2022","journal-title":"Proc. the ACM Meas. Anal. Comput. Syst."},{"issue":"4","key":"10.1016\/j.jss.2026.112952_b78","doi-asserted-by":"crossref","first-page":"544","DOI":"10.1016\/j.jss.2010.11.920","article-title":"Testing and validating machine learning classifiers by metamorphic testing","volume":"84","author":"Xie","year":"2011","journal-title":"J. Syst. Softw."},{"issue":"4","key":"10.1016\/j.jss.2026.112952_b79","doi-asserted-by":"crossref","first-page":"1293","DOI":"10.1109\/TR.2020.2972266","article-title":"METTLE: A metamorphic testing approach to assessing and validating unsupervised machine learning systems","volume":"69","author":"Xie","year":"2020","journal-title":"IEEE Trans. Reliab."},{"issue":"5","key":"10.1016\/j.jss.2026.112952_b80","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3706418","article-title":"Resource-efficient algorithms and systems of foundation models: A survey","volume":"57","author":"Xu","year":"2025","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.jss.2026.112952_b81","first-page":"10566","article-title":"A survey on model compression and acceleration for pretrained language models","volume":"vol. 37","author":"Xu","year":"2023"},{"key":"10.1016\/j.jss.2026.112952_b82","series-title":"A survey on green deep learning","author":"Xu","year":"2021"},{"key":"10.1016\/j.jss.2026.112952_b83","doi-asserted-by":"crossref","unstructured":"Xu, C., Zhou, W., Ge, T., Wei, F., Zhou, M., 2020. Bert-of-theseus: Compressing bert by progressive module replacing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP). pp. 7859\u20137869.","DOI":"10.18653\/v1\/2020.emnlp-main.633"},{"key":"10.1016\/j.jss.2026.112952_b84","doi-asserted-by":"crossref","unstructured":"Xu, C., Zhou, W., Ge, T., Xu, K., McAuley, J., Wei, F., 2021. Beyond preserved accuracy: Evaluating loyalty and robustness of BERT compression. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing. pp. 10653\u201310659.","DOI":"10.18653\/v1\/2021.emnlp-main.832"},{"key":"10.1016\/j.jss.2026.112952_b85","doi-asserted-by":"crossref","first-page":"425","DOI":"10.1145\/3715735","article-title":"Hallucination detection in large language models with metamorphic relations","volume":"2","author":"Yang","year":"2025","journal-title":"Proc. ACM Softw. Eng."},{"key":"10.1016\/j.jss.2026.112952_b86","doi-asserted-by":"crossref","unstructured":"Yang, Z., Shi, J., He, J., Lo, D., 2022. Natural attack for pre-trained models of code. In: Proceedings of the 44th ICSE. pp. 1482\u20131493.","DOI":"10.1145\/3510003.3510146"},{"issue":"FSE","key":"10.1016\/j.jss.2026.112952_b87","doi-asserted-by":"crossref","first-page":"446","DOI":"10.1145\/3715736","article-title":"One-for-all does not work! enhancing vulnerability detection by mixture-of-experts (MoE)","volume":"2","author":"Yang","year":"2025","journal-title":"Proc. the ACM Softw. Eng."},{"key":"10.1016\/j.jss.2026.112952_b88","doi-asserted-by":"crossref","unstructured":"Ye, S., Xu, K., Liu, S., Cheng, H., Lambrechts, J.-H., Zhang, H., Zhou, A., Ma, K., Wang, Y., Lin, X., 2019. Adversarial robustness vs. model compression, or both?. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 111\u2013120.","DOI":"10.1109\/ICCV.2019.00020"},{"key":"10.1016\/j.jss.2026.112952_b89","series-title":"2019 Fifth Workshop on Energy Efficient Machine Learning and Cognitive Computing-NeurIPS Edition (EMC2-NIPS)","first-page":"36","article-title":"Q8bert: Quantized 8bit bert","author":"Zafrir","year":"2019"},{"key":"10.1016\/j.jss.2026.112952_b90","doi-asserted-by":"crossref","unstructured":"Zeng, Z., Tan, H., Zhang, H., Li, J., Zhang, Y., Zhang, L., 2022. An extensive study on pre-trained models for program understanding and generation. In: 31st ACM SIGSOFT ISSTA. pp. 39\u201351.","DOI":"10.1145\/3533767.3534390"},{"key":"10.1016\/j.jss.2026.112952_b91","doi-asserted-by":"crossref","unstructured":"Zhang, J., Chen, J., Hao, D., Xiong, Y., Xie, B., Zhang, L., Mei, H., 2014. Search-based inference of polynomial metamorphic relations. In: Proceedings of the 29th ACM\/IEEE International Conference on Automated Software Engineering. pp. 701\u2013712.","DOI":"10.1145\/2642937.2642994"},{"issue":"3","key":"10.1016\/j.jss.2026.112952_b92","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3511887","article-title":"Towards robustness of deep program processing models\u2014detection, estimation, and enhancement","volume":"31","author":"Zhang","year":"2022","journal-title":"ACM Trans. Softw. Eng. Methodol. (TOSEM)"},{"issue":"1","key":"10.1016\/j.jss.2026.112952_b93","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TSE.2019.2962027","article-title":"Machine learning testing: Survey, landscapes and horizons","volume":"48","author":"Zhang","year":"2020","journal-title":"IEEE Trans. Softw. Eng."},{"key":"10.1016\/j.jss.2026.112952_b94","first-page":"1169","article-title":"Generating adversarial examples for holding robustness of source code processing models","volume":"vol. 34","author":"Zhang","year":"2020"},{"key":"10.1016\/j.jss.2026.112952_b95","series-title":"Structadmm: A systematic, high-efficiency framework of structured weight pruning for DNNs","author":"Zhang","year":"2018"},{"key":"10.1016\/j.jss.2026.112952_b96","article-title":"Devign: Effective vulnerability identification by learning comprehensive program semantics via graph neural networks","volume":"32","author":"Zhou","year":"2019","journal-title":"Adv. NIPS"},{"key":"10.1016\/j.jss.2026.112952_b97","doi-asserted-by":"crossref","first-page":"1556","DOI":"10.1162\/tacl_a_00704","article-title":"A survey on model compression for large language models","volume":"12","author":"Zhu","year":"2024","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"10.1016\/j.jss.2026.112952_b98","doi-asserted-by":"crossref","unstructured":"Zhu, J., Wang, L., Han, X., 2022. Safety and performance, why not both? bi-objective optimized model compression toward ai software deployment. In: Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering. pp. 1\u201313.","DOI":"10.1145\/3551349.3556906"},{"key":"10.1016\/j.jss.2026.112952_b99","doi-asserted-by":"crossref","unstructured":"Zi, B., Zhao, S., Ma, X., Jiang, Y.-G., 2021. Revisiting adversarial robustness distillation: Robust soft labels make student better. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 16443\u201316452.","DOI":"10.1109\/ICCV48922.2021.01613"}],"container-title":["Journal of Systems and Software"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0164121226001858?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0164121226001858?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T20:24:01Z","timestamp":1780777441000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0164121226001858"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":99,"alternative-id":["S0164121226001858"],"URL":"https:\/\/doi.org\/10.1016\/j.jss.2026.112952","relation":{},"ISSN":["0164-1212"],"issn-type":[{"value":"0164-1212","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A metamorphic testing perspective on knowledge distillation for language models of code: Does the student deeply mimic the teacher?","name":"articletitle","label":"Article Title"},{"value":"Journal of Systems and Software","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jss.2026.112952","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"112952"}}