{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T14:56:08Z","timestamp":1779893768003,"version":"3.53.1"},"reference-count":127,"publisher":"Association for Computing Machinery (ACM)","issue":"4","license":[{"start":{"date-parts":[[2024,4,20]],"date-time":"2024-04-20T00:00:00Z","timestamp":1713571200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Luxembourg National Research Funds","award":["C18\/IS\/12669767\/ STELLAR\/LeTraon"],"award-info":[{"award-number":["C18\/IS\/12669767\/ STELLAR\/LeTraon"]}]},{"name":"European Union\u2019s Horizon Research and Innovation Programme","award":["101070303"],"award-info":[{"award-number":["101070303"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["ACM Trans. Softw. Eng. Methodol."],"published-print":{"date-parts":[[2024,5,31]]},"abstract":"<jats:p>This article presents a comprehensive survey on test optimization in deep neural network\u00a0(DNN) testing. Here, test optimization refers to testing with low data labeling effort. We analyzed 90 papers, including 43 from the software engineering (SE) community, 32 from the machine learning (ML) community, and 15 from other communities. Our study: (i) unifies the problems as well as terminologies associated with low-labeling cost testing, (ii) compares the distinct focal points of SE and ML communities, and (iii) reveals the pitfalls in existing literature. Furthermore, we highlight the research opportunities in this domain.<\/jats:p>","DOI":"10.1145\/3643678","type":"journal-article","created":{"date-parts":[[2024,1,27]],"date-time":"2024-01-27T12:51:11Z","timestamp":1706359871000},"page":"1-42","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["Test Optimization in DNN Testing: A Survey"],"prefix":"10.1145","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8251-1669","authenticated-orcid":false,"given":"Qiang","family":"Hu","sequence":"first","affiliation":[{"name":"University of Luxembourg, Esch-sur-Alzette, Luxembourg"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5535-2420","authenticated-orcid":false,"given":"Yuejun","family":"Guo","sequence":"additional","affiliation":[{"name":"Luxembourg Institute of Science and Technology, Esch-sur-Alzette, Luxembourg"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1288-6502","authenticated-orcid":false,"given":"Xiaofei","family":"Xie","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8312-1358","authenticated-orcid":false,"given":"Maxime","family":"Cordy","sequence":"additional","affiliation":[{"name":"University of Luxembourg, Luxembourg, Luxembourg"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8621-2420","authenticated-orcid":false,"given":"Lei","family":"Ma","sequence":"additional","affiliation":[{"name":"The University of Tokyo, Edmonton, Japan and University of Albert, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1852-2547","authenticated-orcid":false,"given":"Mike","family":"Papadakis","sequence":"additional","affiliation":[{"name":"University of Luxembourg, Esch-sur-Alzette, Luxembourg"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1045-4861","authenticated-orcid":false,"given":"Yves","family":"Le Traon","sequence":"additional","affiliation":[{"name":"University of Luxembourg, Esch-sur-Alzette, Luxembourg"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,4,20]]},"reference":[{"key":"e_1_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2023.3243522"},{"key":"e_1_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Zohreh Aghababaeyan Manel Abdellatif Mahboubeh Dadkhah and Lionel Briand. 2023. DeepGD: a multi-objective black-box test selection approach for deep neural networks. arxiv:2303.04878Retrieved from https:\/\/arxiv.org\/pdf\/2303.04878","DOI":"10.1145\/3644388"},{"key":"e_1_3_2_4_2","unstructured":"Jonathan Aigrain and Marcin Detyniecki. 2019. Detecting adversarial examples and other misclassifications in neural networks by introspection. arxiv:1905.09186Retrieved from https:\/\/arxiv.org\/pdf\/1905.09186"},{"key":"e_1_3_2_5_2","first-page":"64","volume-title":"Proceedings of the IEEE International Conference On Artificial Intelligence Testing","author":"Al-Qadasi Hamzah","year":"2022","unstructured":"Hamzah Al-Qadasi, Changshun Wu, Yli\u00e8s Falcone, and Saddek Bensalem. 2022. DeepAbstraction: 2-level prioritization for unlabeled test inputs in deep neural networks. In Proceedings of the IEEE International Conference On Artificial Intelligence Testing. IEEE, Piscataway, NJ, USA, 64\u201371. DOI:10.1109\/AITest55621.2022.00018"},{"key":"e_1_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3550271"},{"key":"e_1_3_2_7_2","first-page":"19274","article-title":"Agreement-on-the-line: predicting the performance of neural networks under distribution shift","volume":"35","author":"Baek Christina","year":"2022","unstructured":"Christina Baek, Yiding Jiang, Aditi Raghunathan, and J Zico Kolter. 2022. Agreement-on-the-line: predicting the performance of neural networks under distribution shift. Advances in Neural Information Processing Systems 35 (2022), 19274\u201319289.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_8_2","first-page":"501","volume-title":"Proceedings of the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Bao Shenglin","year":"2023","unstructured":"Shenglin Bao, Chaofeng Sha, Bihuan Chen, Xin Peng, and Wenyun Zhao. 2023. In defense of simple techniques for neural network test case selection. In Proceedings of the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis. Association for Computing Machinery, New York, NY, USA, 501\u2013513. DOI:10.1145\/3597926.3598073"},{"key":"e_1_3_2_9_2","first-page":"63","volume-title":"Proceedings of the 2019 IEEE International Conference On Artificial Intelligence Testing","author":"Byun Taejoon","year":"2019","unstructured":"Taejoon Byun, Vaibhav Sharma, Abhishek Vijayakumar, Sanjai Rayadurgam, and Darren Cofer. 2019. Input prioritization for testing neural networks. In Proceedings of the 2019 IEEE International Conference On Artificial Intelligence Testing. IEEE, 63\u201370. DOI:10.1109\/AITest.2019.000-6"},{"key":"e_1_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1007\/s10515-023-00396-8"},{"key":"e_1_3_2_11_2","first-page":"14980","article-title":"Detecting errors and estimating accuracy on unlabeled data with self-training ensembles","volume":"34","author":"Chen Jiefeng","year":"2021","unstructured":"Jiefeng Chen, Frederick Liu, Besim Avci, Xi Wu, Yingyu Liang, and Somesh Jha. 2021. Detecting errors and estimating accuracy on unlabeled data with self-training ensembles. In Advances in Neural Information Processing Systems 34 (2021), 14980\u201314992.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3582573"},{"key":"e_1_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394112"},{"key":"e_1_3_2_14_2","first-page":"11467","article-title":"Estimating and explaining model performance when both covariates and labels shift","volume":"35","author":"Chen Lingjiao","year":"2022","unstructured":"Lingjiao Chen, Matei Zaharia, and James Y. Zou. 2022. Estimating and explaining model performance when both covariates and labels shift. Advances in Neural Information Processing Systems 35 (2022), 11467\u201311479.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_15_2","first-page":"1984","volume-title":"Proceedings of the 37th International conference on machine learning","author":"Chuang Ching-Yao","year":"2020","unstructured":"Ching-Yao Chuang, Antonio Torralba, and Stefanie Jegelka. 2020. Estimating generalization under distribution shifts via domain-invariant representations. In Proceedings of the 37th International conference on machine learning (Virtual). PMLR, Brookline, MA, USA, 1984\u20131994. Retrieved from https:\/\/proceedings.mlr.press\/v119\/chuang20a\/chuang20a.pdf"},{"key":"e_1_3_2_16_2","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1145\/3468264.3468614","volume-title":"Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Cito J\u00fcrgen","year":"2021","unstructured":"J\u00fcrgen Cito, Isil Dillig, Seohyun Kim, Vijayaraghavan Murali, and Satish Chandra. 2021. Explaining mispredictions of machine learning models using rule induction. In Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 716\u2013727. DOI:10.1145\/3468264.3468614"},{"key":"e_1_3_2_17_2","first-page":"2674","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Corneanu Ciprian A.","year":"2020","unstructured":"Ciprian A. Corneanu, Sergio Escalera, and Aleix M. Martinez. 2020. Computing the testing error without a testing set. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (Virtual). IEEE Computer Society, Los Alamitos, CA, USA, 2674\u20132682. DOI:10.1109\/CVPR42600.2020.00275"},{"key":"e_1_3_2_18_2","first-page":"3940","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Cui Shuhao","year":"2020","unstructured":"Shuhao Cui, Shuhui Wang, Junbao Zhuo, Liang Li, Qingming Huang, and Qi Tian. 2020. Towards discriminability and diversity: batch nuclear-norm maximization under label insufficient situations. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE Computer Society, Los Alamitos, CA, USA, 3940\u20133949."},{"key":"e_1_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3607191"},{"key":"e_1_3_2_20_2","first-page":"1","volume-title":"Proceedings of the ICML 2019 Workshop on Identifying and Understanding Deep Learning Phenomena","author":"DeChant Chad","year":"2019","unstructured":"Chad DeChant, Seungwook Han, and Hod Lipson. 2019. Predicting the accuracy of neural networks from final and intermediate layer outputs. In Proceedings of the ICML 2019 Workshop on Identifying and Understanding Deep Learning Phenomena. OpenReview.net, Online, 1\u20136. Retrieved from https:\/\/openreview.net\/pdf?id=H1xXwEB2h4"},{"key":"e_1_3_2_21_2","first-page":"2579","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Deng Weijian","year":"2021","unstructured":"Weijian Deng, Stephen Gould, and Liang Zheng. 2021. What does rotation prediction tell us about classifier accuracy under varying testing environments?. In Proceedings of the International Conference on Machine Learning (Virtual). PMLR, Brookline, MA, USA, 2579\u20132589. Retrieved from https:\/\/proceedings.mlr.press\/v139\/deng21a\/deng21a.pdf"},{"key":"e_1_3_2_22_2","unstructured":"Weijian Deng Yumin Suh Stephen Gould and Liang Zheng. 2023. Confidence and dispersity speak: characterising prediction matrix for unsupervised accuracy estimation. arxiv:2302.01094Retrieved from https:\/\/arxiv.org\/pdf\/2302.01094"},{"key":"e_1_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Weijian Deng and Liang Zheng. 2021. Are labels always necessary for classifier accuracy evaluation? In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE Nashville TN USA 15064\u201315073. DOI:10.1109\/CVPR46437.2021.01482","DOI":"10.1109\/CVPR46437.2021.01482"},{"key":"e_1_3_2_24_2","first-page":"15064","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Deng Weijian","year":"2021","unstructured":"Weijian Deng and Liang Zheng. 2021. Are labels always necessary for classifier accuracy evaluation?. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE, Nashville, TN, USA, 15064\u201315073. DOI:10.1109\/CVPR46437.2021.01482"},{"key":"e_1_3_2_25_2","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1145\/3540250.3549152","volume-title":"Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Deng Yao","year":"2022","unstructured":"Yao Deng, Xi Zheng, Mengshi Zhang, Guannan Lou, and Tianyi Zhang. 2022. Scenario-based test reduction and prioritization for multi-module autonomous driving systems. In Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 82\u201393. DOI:10.1145\/3540250.3549152"},{"key":"e_1_3_2_26_2","first-page":"2163","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing","author":"Elsahar Hady","year":"2019","unstructured":"Hady Elsahar and Matthias Gall\u00e9. 2019. To annotate or not? predicting performance drop under domain shift. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing. Association for Computational Linguistics, Hong Kong, China, 2163\u20132173. DOI:10.18653\/v1\/D19-1222"},{"key":"e_1_3_2_27_2","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1145\/3395363.3397357","volume-title":"Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Feng Yang","year":"2020","unstructured":"Yang Feng, Qingkai Shi, Xinyu Gao, Jun Wan, Chunrong Fang, and Zhenyu Chen. 2020. Deepgini: prioritizing massive tests to enhance the robustness of deep neural networks. In Proceedings of the 29th ACM SIGSOFT International Symposium on Software Testing and Analysis. Association for Computing Machinery, New York, NY, USA, 177\u2013188. DOI:10.1145\/3395363.3397357"},{"key":"e_1_3_2_28_2","unstructured":"Agency for Healthcare Research & Quality. 2017. MEPS HC-181: 2015 full year consolidated data file."},{"key":"e_1_3_2_29_2","first-page":"73","volume-title":"Proceedings of the 44th International Conference on Software Engineering","author":"Gao Xinyu","year":"2022","unstructured":"Xinyu Gao, Yang Feng, Yining Yin, Zixi Liu, Zhenyu Chen, and Baowen Xu. 2022. Adaptive test selection for deep neural networks. In Proceedings of the 44th International Conference on Software Engineering. Association for Computing Machinery, New York, NY, USA, 73\u201385. DOI:10.1145\/3510003.3510232"},{"key":"e_1_3_2_30_2","first-page":"1","volume-title":"Proceedings of the NeurIPS 2021 Workshop on Distribution Shifts: Connecting Methods and Applications","author":"Garg Saurabh","year":"2021","unstructured":"Saurabh Garg, Sivaraman Balakrishnan, Zachary Chase Lipton, Behnam Neyshabur, and Hanie Sedghi. 2021. Leveraging unlabeled data to predict out-of-distribution performance. In Proceedings of the NeurIPS 2021 Workshop on Distribution Shifts: Connecting Methods and Applications (Virtual). OpenReview.net, Online, 1\u201330."},{"key":"e_1_3_2_31_2","doi-asserted-by":"crossref","first-page":"1089","DOI":"10.1145\/3368089.3409739","volume-title":"Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Ghamizi Salah","year":"2020","unstructured":"Salah Ghamizi, Maxime Cordy, Martin Gubri, Mike Papadakis, Andrey Boystov, Yves Le Traon, and Anne Goujon. 2020. Search-based adversarial testing and improvement of constrained credit scoring systems. In Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 1089\u20131100. DOI:10.1145\/3368089.3409739"},{"key":"e_1_3_2_32_2","unstructured":"GitHub OpenAI. 2022. Project site of GitHub Copilot. Retrieved from https:\/\/github.com\/features\/copilotAccessed on January 23rd 2024."},{"key":"e_1_3_2_33_2","unstructured":"Federica Granese Marco Romanelli Daniele Gorla Catuscia Palamidessi and Pablo Piantanida. 2021. Doctor: a simple method for detecting misclassification errors. In Advances in Neural Information Processing Systems (NeurIPS\u201921). 34 (2021) 5669\u20135681."},{"key":"e_1_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.10.013"},{"key":"e_1_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3236626"},{"key":"e_1_3_2_36_2","first-page":"348","volume-title":"Proceedings of the 2021 IEEE\/ACM 43rd International Conference on Software Engineering","author":"Guerriero Antonio","year":"2021","unstructured":"Antonio Guerriero, Roberto Pietrantuono, and Stefano Russo. 2021. Operation is the hardest teacher: estimating DNN accuracy looking for mispredictions. In Proceedings of the 2021 IEEE\/ACM 43rd International Conference on Software Engineering. IEEE Press, Madrid, Spain, 348\u2013358. DOI:10.1109\/ICSE43902.2021.00042"},{"key":"e_1_3_2_37_2","first-page":"1134","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision","author":"Guillory Devin","year":"2021","unstructured":"Devin Guillory, Vaishaal Shankar, Sayna Ebrahimi, Trevor Darrell, and Ludwig Schmidt. 2021. Predicting with confidence on unseen distributions. In Proceedings of the IEEE\/CVF international conference on computer vision. IEEE, Piscataway, NJ, USA, 1134\u20131144. DOI:10.1109\/ICCV48922.2021.00117"},{"key":"e_1_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-022-07812-2"},{"key":"e_1_3_2_39_2","first-page":"534","volume-title":"Proceedings of the IEEE International Conference on Software Analysis, Evolution and Reengineering","author":"Hao Yao","year":"2023","unstructured":"Yao Hao, Zhiqiu Huang, Hongjing Guo, and Guohua Shen. 2023. Test input selection for deep neural network enhancement based on multiple-objective optimization. In Proceedings of the IEEE International Conference on Software Analysis, Evolution and Reengineering. IEEE Computer Society, Los Alamitos, CA, USA, 534\u2013545. DOI:10.1109\/SANER56733.2023.00056"},{"key":"e_1_3_2_40_2","first-page":"768","volume-title":"Proceedings of the IEEE 22nd International Conference on Software Quality, Reliability, and Security Companion","author":"He Changtian","year":"2022","unstructured":"Changtian He, Qing Sun, Ji Wu, Haiyan Yang, and Tao Yue. 2022. Feature difference based misclassified sample detection for CNN models deployed in online environment. In Proceedings of the IEEE 22nd International Conference on Software Quality, Reliability, and Security Companion. IEEE, Piscataway, NJ, USA, 768\u2013769. DOI:10.1109\/QRS-C57518.2022.00126"},{"key":"e_1_3_2_41_2","first-page":"1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Hendrycks Dan","year":"2019","unstructured":"Dan Hendrycks and Thomas Dietterich. 2019. Benchmarking neural network robustness to common corruptions and perturbations. In Proceedings of the International Conference on Learning Representations. OpenReview.net, Online, 1\u201316."},{"key":"e_1_3_2_42_2","first-page":"1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Hendrycks Dan","year":"2017","unstructured":"Dan Hendrycks and Kevin Gimpel. 2017. A baseline for detecting misclassified and out-of-distribution dxamples in neural networks. In Proceedings of the International Conference on Learning Representations. OpenReview.net, Online, 1\u201312."},{"key":"e_1_3_2_43_2","unstructured":"Neil Houlsby Ferenc Husz\u00e1r Zoubin Ghahramani and M\u00e1t\u00e9 Lengyel. 2011. Bayesian active learning for classification and preference learning. arxiv:1112.5745.Retrieved from https:\/\/arxiv.org\/pdf\/1112.5745"},{"key":"e_1_3_2_44_2","first-page":"384","volume-title":"Proceedings of the IEEE International Conference on Computer Vision Workshops","author":"Hu Guosheng","year":"2015","unstructured":"Guosheng Hu, Yongxin Yang, Dong Yi, Josef Kittler, William Christmas, Stan Z. Li, and Timothy Hospedales. 2015. When face recognition meets with deep learning: an evaluation of convolutional neural networks for face recognition. In Proceedings of the IEEE International Conference on Computer Vision Workshops. IEEE Computer Society, Los Alamitos, CA, USA, 384\u2013392. DOI:10.1109\/ICCVW.2015.58"},{"key":"e_1_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3511598"},{"key":"e_1_3_2_46_2","first-page":"917","volume-title":"Proceedings of the 2021 36th IEEE\/ACM International Conference on Automated Software Engineering","author":"Hu Qiang","year":"2021","unstructured":"Qiang Hu, Yuejun Guo, Maxime Cordy, Xiaofei Xie, Wei Ma, Mike Papadakis, and Yves Le Traon. 2021. Towards exploring the limitations of active learning: an empirical study. In Proceedings of the 2021 36th IEEE\/ACM International Conference on Automated Software Engineering. IEEE, Piscataway, NJ, USA, 917\u2013929. DOI:10.1109\/ASE51524.2021.9678672"},{"key":"e_1_3_2_47_2","unstructured":"Qiang Hu Yuejun Guo Xiaofei Xie Maxime Cordy Wei Ma Mike Papadakis and Yves Le Traon. 2023. Evaluating the robustness of test selection methods for deep neural networks. arxiv:2308.01314.Retrieved from https:\/\/arxiv.org\/pdf\/2308.01314"},{"key":"e_1_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3611666"},{"key":"e_1_3_2_49_2","first-page":"1776","volume-title":"Proceedings of the 45th International Conference on Software Engineering","author":"Hu Qiang","year":"2023","unstructured":"Qiang Hu, Yuejun Guo, Xiaofei Xie, Maxime Cordy, Mike Papadakis, Lei Ma, and Yves Le Traon. 2023. Aries: efficient testing of deep neural networks via labeling-free accuracy estimation. In Proceedings of the 45th International Conference on Software Engineering. IEEE Press, Piscataway, NJ, USA, 1776\u20131787. DOI:10.1109\/ICSE48619.2023.00152"},{"key":"e_1_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.cosrev.2020.100270"},{"key":"e_1_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/582415.582418"},{"key":"e_1_3_2_52_2","first-page":"5546","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","volume":"31","author":"Jiang Heinrich","year":"2018","unstructured":"Heinrich Jiang, Been Kim, Melody Guan, and Maya Gupta. 2018. To trust or not to trust a classifier. In Proceedings of the 32nd International Conference on Neural Information Processing Systems(Vol. 31). Curran Associates Inc., Red Hook, NY, USA, 5546\u20135557."},{"key":"e_1_3_2_53_2","unstructured":"Yiding Jiang Dilip Krishnan Hossein Mobahi and Samy Bengio. 2019. Predicting the generalization gap in deep networks with margin distributions. arxiv:1810.00113.Retrieved from https:\/\/arxiv.org\/pdf\/1810.00113"},{"key":"e_1_3_2_54_2","unstructured":"Yiding Jiang Vaishnavh Nagarajan Christina Baek and J Zico Kolter. 2022. Assessing generalization of SGD via disagreement. arxiv:2106.13799.Retrieved from https:\/\/arxiv.org\/pdf\/2106.13799"},{"key":"e_1_3_2_55_2","first-page":"1039","volume-title":"Proceedings of the 41st International Conference on Software Engineering","author":"Kim Jinhan","year":"2019","unstructured":"Jinhan Kim, Robert Feldt, and Shin Yoo. 2019. Guiding deep learning system testing using surprise adequacy. In Proceedings of the 41st International Conference on Software Engineering. IEEE Press, Montreal, Quebec, Canada, 1039\u20131049. DOI:10.1109\/ICSE.2019.00108"},{"key":"e_1_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3546947"},{"key":"e_1_3_2_57_2","first-page":"1466","volume-title":"Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Kim Jinhan","year":"2020","unstructured":"Jinhan Kim, Jeongil Ju, Robert Feldt, and Shin Yoo. 2020. Reducing dnn labelling cost using surprise adequacy: An industrial case study for autonomous driving. In Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 1466\u20131476. DOI:10.1145\/3368089.3417065"},{"key":"e_1_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3237381"},{"key":"e_1_3_2_59_2","series-title":"Proceedings of Machine Learning Research","first-page":"5637","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Koh Pang Wei","year":"2021","unstructured":"Pang Wei Koh, Shiori Sagawa, Henrik Marklund, Sang Michael Xie, Marvin Zhang, Akshay Balsubramani, Weihua Hu, Michihiro Yasunaga, Richard Lanas Phillips, Irena Gao, Tony Lee, Etienne David, Ian Stavness, Wei Guo, Berton Earnshaw, Imran Haque, Sara M. Beery, Jure Leskovec, Anshul Kundaje, Emma Pierson, Sergey Levine, Chelsea Finn, and Percy Liang. 2021. WILDS: a benchmark of in-the-wild distribution shifts. In Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, Brookline, MA, USA, 5637\u20135664. Retrieved from https:\/\/proceedings.mlr.press\/v139\/koh21a.html"},{"key":"e_1_3_2_60_2","first-page":"1137","volume-title":"Proceedings of the 14th International Joint Conference on Artificial Intelligence - Volume 2","author":"Kohavi Ron","year":"1995","unstructured":"Ron Kohavi et\u00a0al. 1995. A study of cross-validation and bootstrap for accuracy estimation and model selection. In Proceedings of the 14th International Joint Conference on Artificial Intelligence - Volume 2. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 1137\u20131143."},{"key":"e_1_3_2_61_2","first-page":"5753","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Kossen Jannik","year":"2021","unstructured":"Jannik Kossen, Sebastian Farquhar, Yarin Gal, and Tom Rainforth. 2021. Active testing: Sample-efficient model evaluation. In Proceedings of the International Conference on Machine Learning. PMLR, Brookline, MA, USA, 5753\u20135763."},{"key":"e_1_3_2_62_2","unstructured":"Jannik Kossen Sebastian Farquhar Yarin Gal and Thomas Rainforth. 2022. Active surrogate estimators: An active learning approach to label-efficient model evaluation. In Advances in Neural Information Processing Systems (NeurIPS\u201922). 35 (2022) 24557\u201324570."},{"key":"e_1_3_2_63_2","unstructured":"Kimin Lee Honglak Lee Kibok Lee and Jinwoo Shin. 2018. Training confidence-calibrated classifiers for detecting out-of-distribution samples. arxiv:1711.09325.Retrieved from https:\/\/arxiv.org\/pdf\/1711.09325"},{"key":"e_1_3_2_64_2","unstructured":"Young-Woo Lee and Heung-Seok Chae. 2023. Selection of test samples to improve DNN test efficiency based on neuron clusters. Retrieved from https:\/\/papers.ssrn.com\/sol3\/papers.cfm?abstract_id=4399496Accessed on January 23rd 2024."},{"key":"e_1_3_2_65_2","unstructured":"Yu Li Muxi Chen Yannan Liu Daojing He and Qiang Xu. 2022. An empirical study on the efficacy of deep active learning for image classification. arxiv:2212.03088.Retrieved from https:\/\/arxiv.org\/pdf\/2212.03088"},{"key":"e_1_3_2_66_2","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1145\/3533767.3534408","volume-title":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Li Yu","year":"2022","unstructured":"Yu Li, Muxi Chen, and Qiang Xu. 2022. HybridRepair: towards annotation-efficient repair for deep learning models. In Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis. Association for Computing Machinery, New York, NY, USA, 227\u2013238. DOI:10.1145\/3533767.3534408"},{"key":"e_1_3_2_67_2","first-page":"20874","volume-title":"Proceedings of the Advances in Neural Information Processing Systems - Volume 34","author":"Li Yu","year":"2021","unstructured":"Yu Li, Min Li, Qiuxia Lai, Yannan Liu, and Qiang Xu. 2021. Testrank: bringing order into unlabeled test instances for deep learning tasks. In Proceedings of the Advances in Neural Information Processing Systems - Volume 34. 20874\u201320886. Retrieved from https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/ae78510109d46b0a6eef9820a4ca95d6-Abstract.html"},{"key":"e_1_3_2_68_2","first-page":"842","volume-title":"Proceedings of the 2022 IEEE 22nd International Conference on Software Quality, Reliability and Security","author":"Li Yuechen","year":"2022","unstructured":"Yuechen Li, Hanyu Pei, Linzhi Huang, and Beibei Yin. 2022. A distance-based dynamic random testing strategy for natural language processing DNN models. In Proceedings of the 2022 IEEE 22nd International Conference on Software Quality, Reliability and Security. IEEE, Piscataway, NJ, USA, 842\u2013853. DOI:10.1109\/QRS57517.2022.00089"},{"key":"e_1_3_2_69_2","first-page":"693","volume-title":"Proceedings of the International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Li Zeju","year":"2022","unstructured":"Zeju Li, Konstantinos Kamnitsas, Mobarakol Islam, Chen Chen, and Ben Glocker. 2022. Estimating model performance under domain shifts with class-specific confidence scores. In Proceedings of the International Conference on Medical Image Computing and Computer-Assisted Intervention. Springer, Berlin, Germany, 693\u2013703."},{"key":"e_1_3_2_70_2","first-page":"499","volume-title":"Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Li Zenan","year":"2019","unstructured":"Zenan Li, Xiaoxing Ma, Chang Xu, Chun Cao, Jingwei Xu, and Jian L\u00fc. 2019. Boosting operational DNN testing efficiency through conditioning. In Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 499\u2013509. DOI:10.1145\/3338906.3338930"},{"key":"e_1_3_2_71_2","first-page":"1238","volume-title":"Proceedings of the 45th International Conference on Software Engineering","author":"Li Zongjie","year":"2023","unstructured":"Zongjie Li, Chaozheng Wang, Zhibo Liu, Haoxuan Wang, Dong Chen, Shuai Wang, and Cuiyun Gao. 2023. Cctest: testing and repairing code completion systems. In Proceedings of the 45th International Conference on Software Engineering. IEEE Press, Piscataway, NJ, USA, 1238\u20131250. DOI:10.1109\/ICSE48619.2023.00110"},{"key":"e_1_3_2_72_2","first-page":"598","volume-title":"Proceedings of the 44th International Conference on Software Engineering","author":"Liu Zixi","year":"2022","unstructured":"Zixi Liu, Yang Feng, Yining Yin, and Zhenyu Chen. 2022. DeepState: selecting test suites to enhance the robustness of recurrent neural networks. In Proceedings of the 44th International Conference on Software Engineering. Association for Computing Machinery, New York, NY, USA, 598\u2013609. DOI:10.1145\/3510003.3510231"},{"key":"e_1_3_2_73_2","unstructured":"Yuzhe Lu Zhenlin Wang Runtian Zhai Soheil Kolouri Joseph Campbell and Katia P. Sycara. 2023. Predicting out-of-distribution error with confidence optimal transport. In ICLR 2023 Workshop on Pitfalls of Limited Data and Computation for Trustworthy ML (Kigali Rwanda). OpenReview.net Online 1\u20138."},{"key":"e_1_3_2_74_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16449-1_66"},{"key":"e_1_3_2_75_2","unstructured":"Lei Ma Felix Juefei-Xu Minhui Xue Qiang Hu Sen Chen Bo Li Yang Liu Jianjun Zhao Jianxiong Yin and Simon See. 2018. Secure deep learning engineering: A software quality assurance perspective. arxiv:1810.04538.Retrieved from https:\/\/arxiv.org\/pdf\/1810.04538"},{"key":"e_1_3_2_76_2","first-page":"175","volume-title":"Proceedings of the 2018 26th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Ma Shiqing","year":"2018","unstructured":"Shiqing Ma, Yingqi Liu, Wen-Chuan Lee, Xiangyu Zhang, and Ananth Grama. 2018. MODE: automated neural network model debugging via state differential analysis and input selection. In Proceedings of the 2018 26th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 175\u2013186. DOI:10.1145\/3236024.3236082"},{"key":"e_1_3_2_77_2","doi-asserted-by":"publisher","DOI":"10.1145\/3417330"},{"key":"e_1_3_2_78_2","first-page":"1467","volume-title":"Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Ma Yu-Seung","year":"2021","unstructured":"Yu-Seung Ma, Shin Yoo, and Taeho Kim. 2021. Selecting test inputs for DNNs using differential testing with subspecialized model instances. In Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering. Association for Computing Machinery, New York, NY, USA, 1467\u20131470. DOI:10.1145\/3468264.3473131"},{"key":"e_1_3_2_79_2","first-page":"1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems","volume":"17","author":"Madani Omid","year":"2004","unstructured":"Omid Madani, David Pennock, and Gary Flake. 2004. Co-validation: using model disagreement on unlabeled data to validate classification algorithms. In Proceedings of the Advances in Neural Information Processing Systems, Vol. 17. MIT Press, Vancouver, British Columbia, Canada, 1\u20138."},{"key":"e_1_3_2_80_2","first-page":"7047","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"Malinin Andrey","year":"2018","unstructured":"Andrey Malinin and Mark Gales. 2018. Predictive uncertainty estimation via prior networks. In Proceedings of the 32nd International Conference on Neural Information Processing Systems. Curran Associates Inc., Red Hook, NY, USA, 7047\u20137058."},{"key":"e_1_3_2_81_2","first-page":"5419","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Maqueda Ana I.","year":"2018","unstructured":"Ana I. Maqueda, Antonio Loquercio, Guillermo Gallego, Narciso Garc\u00eda, and Davide Scaramuzza. 2018. Event-based vision meets deep learning on steering prediction for self-driving cars. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE Computer Society, Los Alamitos, CA, USA, 5419\u20135427. DOI:10.1109\/CVPR.2018.00568"},{"key":"e_1_3_2_82_2","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-24025-8"},{"key":"e_1_3_2_83_2","first-page":"279","volume-title":"Proceedings of the 2018 IEEE International Conference on Software Testing, Verification and Validation Workshops","author":"Masuda Satoshi","year":"2018","unstructured":"Satoshi Masuda, Kohichi Ono, Toshiaki Yasue, and Nobuhiro Hosokawa. 2018. A survey of software quality for machine learning applications. In Proceedings of the 2018 IEEE International Conference on Software Testing, Verification and Validation Workshops. IEEE, Piscataway, NJ, USA, 279\u2013284. DOI:10.1109\/ICSTW.2018.00061"},{"key":"e_1_3_2_84_2","doi-asserted-by":"crossref","DOI":"10.1201\/9781420049176","volume-title":"Recurrent Neural Networks: Design and Applications (1st ed.)","author":"Medsker Larry","year":"1999","unstructured":"Larry Medsker and Lakhmi C. Jain. 1999. Recurrent Neural Networks: Design and Applications (1st ed.). CRC Press, Inc., USA."},{"key":"e_1_3_2_85_2","first-page":"385","volume-title":"Proceedings of the IEEE\/ACM 43rd International Conference on Software Engineering","author":"Meng Linghan","year":"2021","unstructured":"Linghan Meng, Yanhui Li, Lin Chen, Zhi Wang, Di Wu, Yuming Zhou, and Baowen Xu. 2021. Measuring discrimination to boost comparative testing for multiple deep learning models. In Proceedings of the IEEE\/ACM 43rd International Conference on Software Engineering. IEEE, Piscataway, NJ, USA, 385\u2013396. DOI:10.1109\/ICSE43902.2021.00045"},{"key":"e_1_3_2_86_2","first-page":"76","volume-title":"Proceedings of the 48th Euromicro Conference on Software Engineering and Advanced Applications","author":"Mosin Vasilii","year":"2022","unstructured":"Vasilii Mosin, Miroslaw Staron, Darko Durisic, Francisco Gomes de Oliveira Neto, Sushant Kumar Pandey, and Ashok Chaitanya Koppisetty. 2022. Comparing input prioritization techniques for testing deep learning algorithms. In Proceedings of the 48th Euromicro Conference on Software Engineering and Advanced Applications. IEEE Computer Society, Los Alamitos, CA, USA, 76\u201383. DOI:10.1109\/SEAA56994.2022.00020"},{"key":"e_1_3_2_87_2","first-page":"624","volume-title":"Proceedings of the 9th International Conference on Dependable Systems and Their Applications","author":"Pan Zhonghao","year":"2022","unstructured":"Zhonghao Pan, Shan Zhou, Jianmin Wang, Jinbo Wang, Jiao Jia, and Yang Feng. 2022. Test case prioritization for deep neural networks. In Proceedings of the 9th International Conference on Dependable Systems and Their Applications. IEEE, Piscataway, NJ, USA, 624\u2013628. DOI:10.1109\/DSA56465.2022.00089"},{"key":"e_1_3_2_88_2","doi-asserted-by":"publisher","unstructured":"Kexin Pei Yinzhi Cao Junfeng Yang and Suman Jana. 2019. Deepxplore: automated whitebox testing of deep learning systems. 9 pages. DOI:10.1145\/3361566","DOI":"10.1145\/3361566"},{"key":"e_1_3_2_89_2","unstructured":"Ruchir Puri David S. Kung Geert Janssen Wei Zhang Giacomo Domeniconi Vladimir Zolotov Julian Dolby Jie Chen Mihir Choudhury Lindsey Decker et\u00a0al. 2021. Codenet: a large-scale ai for code dataset for learning a diversity of coding tasks. arxiv:2105.12655.Retrieved from https:\/\/arxiv.org\/pdf\/2105.12655"},{"key":"e_1_3_2_90_2","first-page":"8017","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"Qiu Xin","year":"2022","unstructured":"Xin Qiu and Risto Miikkulainen. 2022. Detecting misclassification errors in neural networks with a gaussian process model. In Proceedings of the AAAI Conference on Artificial Intelligence. Cambridge University Press, Cambridge, UK, 8017\u20138027."},{"key":"e_1_3_2_91_2","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-020-09881-0"},{"key":"e_1_3_2_92_2","doi-asserted-by":"publisher","DOI":"10.1145\/248233.248262"},{"key":"e_1_3_2_93_2","doi-asserted-by":"publisher","DOI":"10.1016\/0377-0427(87)90125-7"},{"key":"e_1_3_2_94_2","first-page":"2483","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","author":"Sensoy Murat","year":"2021","unstructured":"Murat Sensoy, Maryam Saleki, Simon Julier, Reyhan Aydogan, and John Reid. 2021. Misclassification risk and uncertainty quantification in deep classifiers. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. IEEE Computer Society, Los Alamitos, CA, USA, 2483\u20132491. DOI:10.1109\/WACV48630.2021.00253"},{"key":"e_1_3_2_95_2","first-page":"410","volume-title":"Proceedings of the 35th IEEE\/ACM International Conference on Automated Software Engineering","author":"Shen Weijun","year":"2021","unstructured":"Weijun Shen, Yanhui Li, Lin Chen, Yuanlei Han, Yuming Zhou, and Baowen Xu. 2021. Multiple-boundary clustering and prioritization to promote neural network retraining. In Proceedings of the 35th IEEE\/ACM International Conference on Automated Software Engineering. Association for Computing Machinery, New York, NY, USA, 410\u2013422. DOI:10.1145\/3324884.3416621"},{"key":"e_1_3_2_96_2","first-page":"157","volume-title":"Proceedings of the 2021 IEEE 21st International Conference on Software Quality, Reliability and Security","author":"Shi Ying","year":"2021","unstructured":"Ying Shi, Beibei Yin, Zheng Zheng, and Tiancheng Li. 2021. An empirical study on test case prioritization metrics for deep neural networks. In Proceedings of the 2021 IEEE 21st International Conference on Software Quality, Reliability and Security. IEEE, Piscataway, NJ, USA, 157\u2013166. DOI:10.1109\/QRS54544.2021.00027"},{"key":"e_1_3_2_97_2","doi-asserted-by":"publisher","unstructured":"Thibault Simonetto Salijona Dyrmishi Salah Ghamizi Maxime Cordy and Yves Le Traon. 2022. A unified framework for adversarial attack and defense in constrained feature space. In Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence (Mess Wien Vienna Austria). International Joint Conferences on Artificial Intelligence Organization 1313\u20131319. DOI:10.24963\/ijcai.2022\/183","DOI":"10.24963\/ijcai.2022\/183"},{"key":"e_1_3_2_98_2","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1145\/3377811.3380353","volume-title":"Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering","author":"Stocco Andrea","year":"2020","unstructured":"Andrea Stocco, Michael Weiss, Marco Calzana, and Paolo Tonella. 2020. Misbehaviour prediction for autonomous driving systems. In Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering. Association for Computing Machinery, New York, NY, USA, 359\u2013371. DOI:10.1145\/3377811.3380353"},{"key":"e_1_3_2_99_2","first-page":"11741","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Sun Xiaoxiao","year":"2021","unstructured":"Xiaoxiao Sun, Yunzhong Hou, Weijian Deng, Hongdong Li, and Liang Zheng. 2021. Ranking models in unlabeled new environments. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. IEEE Computer Society, Los Alamitos, CA, USA, 11741\u201311751. DOI:10.1109\/ICCV48922.2021.01155"},{"key":"e_1_3_2_100_2","unstructured":"Xiaoxiao Sun Yunzhong Hou Hongdong Li and Liang Zheng. 2021. Label-free model evaluation with semi-structured dataset representations. arxiv:2112.00694.Retrieved from https:\/\/arxiv.org\/pdf\/2112.00694"},{"key":"e_1_3_2_101_2","first-page":"2818","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"Szegedy Christian","year":"2016","unstructured":"Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jon Shlens, and Zbigniew Wojna. 2016. Rethinking the inception architecture for computer vision. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. IEEE, Piscataway, NJ, USA, 2818\u20132826. DOI:10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_102_2","first-page":"368","volume-title":"Proceedings of the International Conference on Advanced Data Mining and Applications","author":"Tao Yali","year":"2022","unstructured":"Yali Tao, Chuanqi Tao, Hongjing Guo, and Bohan Li. 2022. TPFL: test input prioritization for deep neural networks based on fault localization. In Proceedings of the International Conference on Advanced Data Mining and Applications. Springer, Berlin, Germany, 368\u2013383. DOI:10.1007\/978-3-031-22064-7_27"},{"key":"e_1_3_2_103_2","unstructured":"Thomas Unterthiner Daniel Keysers Sylvain Gelly Olivier Bousquet and Ilya Tolstikhin. 2021. Predicting neural network accuracy from weights. arxiv:2002.11448.Retrieved from https:\/\/arxiv.org\/pdf\/2002.11448"},{"key":"e_1_3_2_104_2","doi-asserted-by":"crossref","first-page":"8237","DOI":"10.18653\/v1\/2022.acl-long.566","volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"Vazhentsev Artem","year":"2022","unstructured":"Artem Vazhentsev, Gleb Kuzmin, Artem Shelmanov, Akim Tsvigun, Evgenii Tsymbalov, Kirill Fedyanin, Maxim Panov, Alexander Panchenko, Gleb Gusev, Mikhail Burtsev, Manvel Avetisian, and Leonid Zhukov. 2022. Uncertainty estimation of transformer predictions for misclassification detection. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Dublin, Ireland, 8237\u20138252. DOI:10.18653\/v1\/2022.acl-long.566"},{"key":"e_1_3_2_105_2","doi-asserted-by":"crossref","first-page":"727","DOI":"10.1145\/3377811.3380379","volume-title":"Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering","author":"Wang Huiyan","year":"2020","unstructured":"Huiyan Wang, Jingwei Xu, Chang Xu, Xiaoxing Ma, and Jian Lu. 2020. Dissector: input validation for deep learning applications by crossing-layer dissection. In Proceedings of the ACM\/IEEE 42nd International Conference on Software Engineering. Association for Computing Machinery, New York, NY, USA, 727\u2013738. DOI:10.1145\/3377811.3380379"},{"key":"e_1_3_2_106_2","first-page":"300","volume-title":"Proceedings of the 2021 IEEE\/ACM 43rd International Conference on Software Engineering","author":"Wang Jingyi","year":"2021","unstructured":"Jingyi Wang, Jialuo Chen, Youcheng Sun, Xingjun Ma, Dongxia Wang, Jun Sun, and Peng Cheng. 2021. RobOT: robustness-oriented testing for deep learning systems. In Proceedings of the 2021 IEEE\/ACM 43rd International Conference on Software Engineering. IEEE, Piscataway, NJ, USA, 300\u2013311. DOI:10.1109\/ICSE43902.2021.00038"},{"issue":"1","key":"e_1_3_2_107_2","first-page":"012017","article-title":"Test input selection for deep neural networks","volume":"1693","author":"Wang Zhiyu","year":"2020","unstructured":"Zhiyu Wang, Sihan Xu, Xiangrui Cai, and Hua Ji. 2020. Test input selection for deep neural networks. Journal of Physics: Conference Series 1693, 1 (2020), 012017.","journal-title":"Journal of Physics: Conference Series"},{"key":"e_1_3_2_108_2","first-page":"397","volume-title":"Proceedings of the 2021 IEEE\/ACM 43rd International Conference on Software Engineering","author":"Wang Zan","year":"2021","unstructured":"Zan Wang, Hanmo You, Junjie Chen, Yingyi Zhang, Xuyuan Dong, and Wenbin Zhang. 2021. Prioritizing test inputs for deep neural networks via mutation analysis. In Proceedings of the 2021 IEEE\/ACM 43rd International Conference on Software Engineering. IEEE, Piscataway, NJ, USA, 397\u2013409. DOI:10.1109\/ICSE43902.2021.00046"},{"key":"e_1_3_2_109_2","first-page":"682","volume-title":"Proceedings of the IEEE 22nd International Conference on Software Quality, Reliability and Security","author":"Wei Zhengyuan","year":"2022","unstructured":"Zhengyuan Wei, Haipeng Wang, Imran Ashraf, and W. K. Chan. 2022. Predictive mutation analysis of test case prioritization for deep neural networks. In Proceedings of the IEEE 22nd International Conference on Software Quality, Reliability and Security. IEEE, Piscataway, NJ, USA, 682\u2013693. DOI:10.1109\/QRS57517.2022.00074"},{"key":"e_1_3_2_110_2","doi-asserted-by":"publisher","DOI":"10.1002\/0471704091"},{"key":"e_1_3_2_111_2","first-page":"17","volume-title":"Proceedings of the 2021 IEEE\/ACM 3rd International Workshop on Deep Learning for Testing and Testing for Deep Learning","author":"Weiss Michael","year":"2021","unstructured":"Michael Weiss, Rwiddhi Chakraborty, and Paolo Tonella. 2021. A review and refinement of surprise adequacy. In Proceedings of the 2021 IEEE\/ACM 3rd International Workshop on Deep Learning for Testing and Testing for Deep Learning. IEEE, Piscataway, NJ, USA, 17\u201324. DOI:10.1109\/DeepTest52559.2021.00009"},{"key":"e_1_3_2_112_2","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1145\/3533767.3534375","volume-title":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Weiss Michael","year":"2022","unstructured":"Michael Weiss and Paolo Tonella. 2022. Simple techniques work surprisingly well for neural network test prioritization and active learning (replicability study). In Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis. Association for Computing Machinery, New York, NY, USA, 139\u2013150. DOI:10.1145\/3533767.3534375"},{"key":"e_1_3_2_113_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110955"},{"key":"e_1_3_2_114_2","first-page":"1","volume-title":"37th Conference on Neural Information Processing Systems","author":"Xie Renchunzi","year":"2023","unstructured":"Renchunzi Xie, Hongxin Wei, Yuzhou Cao, Lei Feng, and Bo An. 2023. On the importance of feature separability in predicting out-of-distribution error. In Proceedings of the37th Conference on Neural Information Processing Systems. OpenReview.net, Online, 1\u201318."},{"key":"e_1_3_2_115_2","first-page":"146","volume-title":"Proceedings of the 28th ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Xie Xiaofei","year":"2019","unstructured":"Xiaofei Xie, Lei Ma, Felix Juefei-Xu, Minhui Xue, Hongxu Chen, Yang Liu, Jianjun Zhao, Bo Li, Jianxiong Yin, and Simon See. 2019. Deephunter: a coverage-guided fuzz testing framework for deep neural networks. In Proceedings of the 28th ACM SIGSOFT International Symposium on Software Testing and Analysis. Association for Computing Machinery, New York, NY, USA, 146\u2013157. DOI:10.1145\/3293882.3330579"},{"key":"e_1_3_2_116_2","volume-title":"Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering","author":"Xie Xiaoyuan","year":"2022","unstructured":"Xiaoyuan Xie, Pengbo Yin, and Songqiang Chen. 2022. Boosting the revealing of detected violations in deep learning testing: a diversity-guided method. In Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering. Association for Computing Machinery, New York, NY, USA, Article 17, 13 pages. DOI:10.1145\/3551349.3556919"},{"key":"e_1_3_2_117_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.scico.2021.102761"},{"key":"e_1_3_2_118_2","unstructured":"Zhou Yang Jieke Shi Muhammad Hilmi Asyrofi Bowen Xu Xin Zhou DongGyun Han and David Lo. 2023. Prioritizing speech test cases. arxiv:2302.00330.Retrieved from https:\/\/arxiv.org\/pdf\/2302.00330"},{"key":"e_1_3_2_119_2","first-page":"25721","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Yu Yaodong","year":"2022","unstructured":"Yaodong Yu, Zitong Yang, Alexander Wei, Yi Ma, and Jacob Steinhardt. 2022. Predicting out-of-distribution error with the projection norm. In Proceedings of the International Conference on Machine Learning. PMLR, Brookline, MA, USA, 25721\u201325746."},{"key":"e_1_3_2_120_2","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1145\/2619239.2631434","volume-title":"Proceedings of the 2014 ACM conference on SIGCOMM","author":"Yuan Zhenlong","year":"2014","unstructured":"Zhenlong Yuan, Yongqiang Lu, Zhaoguo Wang, and Yibo Xue. 2014. Droid-sec: deep learning in android malware detection. In Proceedings of the 2014 ACM conference on SIGCOMM. Association for Computing Machinery, New York, NY, USA, 371\u2013372. DOI:10.1145\/2740070.2631434"},{"key":"e_1_3_2_121_2","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2019.2962027"},{"key":"e_1_3_2_122_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2022.106982"},{"key":"e_1_3_2_123_2","first-page":"1","volume-title":"Proceedings of the 38th IEEE\/ACM International Conference on Automated Software Engineering","author":"Zheng Haibin","year":"2023","unstructured":"Haibin Zheng, Jinyin Chen, and Haibo Jin. 2023. CertPri: certifiable prioritization for deep neural networks via movement cost in feature space. In Proceedings of the 38th IEEE\/ACM International Conference on Automated Software Engineering. IEEE Computer Society, Los Alamitos, CA, USA, 1\u201313. DOI:10.1109\/ASE56229.2023.00126"},{"key":"e_1_3_2_124_2","first-page":"772","volume-title":"Proceedings of the 34th IEEE\/ACM International Conference on Automated Software Engineering","author":"Zheng Yan","year":"2019","unstructured":"Yan Zheng, Xiaofei Xie, Ting Su, Lei Ma, Jianye Hao, Zhaopeng Meng, Yang Liu, Ruimin Shen, Yingfeng Chen, and Changjie Fan. 2019. Wuji: automatic online combat game testing using evolutionary deep reinforcement learning. In Proceedings of the 34th IEEE\/ACM International Conference on Automated Software Engineering. IEEE, Piscataway, NJ, USA, 772\u2013784. DOI:10.1109\/ASE.2019.00077"},{"key":"e_1_3_2_125_2","first-page":"289","volume-title":"Proceedings of the IEEE 31st International Symposium on Software Reliability Engineering","author":"Zhou Jianyi","year":"2020","unstructured":"Jianyi Zhou, Feng Li, Jinhao Dong, Hongyu Zhang, and Dan Hao. 2020. Cost-effective testing of a deep learning model through input reduction. In Proceedings of the IEEE 31st International Symposium on Software Reliability Engineering. IEEE Computer Society, Los Alamitos, CA, USA, 289\u2013300. DOI:10.1109\/ISSRE5003.2020.00035"},{"key":"e_1_3_2_126_2","first-page":"518","volume-title":"Proceedings of the Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXV","author":"Zhu Fei","year":"2022","unstructured":"Fei Zhu, Zhen Cheng, Xu-Yao Zhang, and Cheng-Lin Liu. 2022. Rethinking confidence calibration for failure prediction. In Proceedings of the Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXV. Springer, Berlin, Germany, 518\u2013536. Retrieved from https:\/\/www.ecva.net\/papers\/eccv_2022\/papers_ECCV\/papers\/136850512.pdf"},{"key":"e_1_3_2_127_2","first-page":"12074","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Zhu Fei","year":"2023","unstructured":"Fei Zhu, Zhen Cheng, Xu-Yao Zhang, and Cheng-Lin Liu. 2023. OpenMix: exploring outlier samples for misclassification detection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE Computer Society, Los Alamitos, CA, USA, 12074\u201312083. DOI:10.1109\/CVPR52729.2023.01162"},{"key":"e_1_3_2_128_2","doi-asserted-by":"publisher","DOI":"10.3390\/app13085056"}],"container-title":["ACM Transactions on Software Engineering and Methodology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643678","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643678","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:33Z","timestamp":1750291533000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643678"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,20]]},"references-count":127,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,5,31]]}},"alternative-id":["10.1145\/3643678"],"URL":"https:\/\/doi.org\/10.1145\/3643678","relation":{},"ISSN":["1049-331X","1557-7392"],"issn-type":[{"value":"1049-331X","type":"print"},{"value":"1557-7392","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,20]]},"assertion":[{"value":"2023-10-19","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-01-17","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2024-04-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}