{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:51:57Z","timestamp":1773247917695,"version":"3.50.1"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T00:00:00Z","timestamp":1623974400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T00:00:00Z","timestamp":1623974400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1007\/s10664-021-09985-1","type":"journal-article","created":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T14:02:41Z","timestamp":1624024961000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["To what extent do DNN-based image classification models make unreliable inferences?"],"prefix":"10.1007","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1644-2965","authenticated-orcid":false,"given":"Yongqiang","family":"Tian","sequence":"first","affiliation":[]},{"given":"Shiqing","family":"Ma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5588-9618","authenticated-orcid":false,"given":"Ming","family":"Wen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8147-8126","authenticated-orcid":false,"given":"Yepang","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3508-7172","authenticated-orcid":false,"given":"Shing-Chi","family":"Cheung","sequence":"additional","affiliation":[]},{"given":"Xiangyu","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,18]]},"reference":[{"key":"9985_CR1","doi-asserted-by":"publisher","unstructured":"Aggarwal A, Lohia P, Nagar S, Dey K, Saha D (2019) Black box fairness testing of machine learning models. In: Proceedings of the 2019 27th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, association for computing machinery, ESEC\/FSE 2019, New York, NY, USA, pp 625\u2013635. https:\/\/doi.org\/10.1145\/3338906.3338937","DOI":"10.1145\/3338906.3338937"},{"issue":"5","key":"9985_CR2","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1109\/TSE.2014.2372785","volume":"41","author":"ET Barr","year":"2015","unstructured":"Barr ET, Harman M, McMinn P, Shahbaz M, Yoo S (2015) The oracle problem in software testing: A survey. IEEE Trans Softw Eng 41 (5):507\u2013525","journal-title":"IEEE Trans Softw Eng"},{"key":"9985_CR3","unstructured":"Ben-Baruch E, Ridnik T, Zamir N, Noy A, Friedman I, Protter M, Zelnik-Manor L (2020) Asymmetric loss for multi-label classification. arXiv:2009.14119"},{"key":"9985_CR4","doi-asserted-by":"crossref","unstructured":"Benesty J, Chen J, Huang Y, Cohen I (2009) Pearson correlation coefficient. In: Noise reduction in speech processing. Springer, pp 1\u20134","DOI":"10.1007\/978-3-642-00296-0_5"},{"key":"9985_CR5","doi-asserted-by":"publisher","unstructured":"Carlini N, Wagner DA (2017) Towards evaluating the robustness of neural networks. In: 2017 IEEE symposium on security and privacy, SP 2017, May 22-26, 2017. IEEE Computer Society, San Jose, CA, USA, pp 39\u201357. https:\/\/doi.org\/10.1109\/SP.2017.49","DOI":"10.1109\/SP.2017.49"},{"key":"9985_CR6","unstructured":"Chen TY, Cheung SC, Yiu SM (1998) Metamorphic testing: a new approach for generating next test cases. Tech. Rep. HKUST-CS98-01 Department of Computer Science, Hong Kong University of Science and Technology, Hong Kong"},{"issue":"1","key":"9985_CR7","doi-asserted-by":"publisher","first-page":"4:1","DOI":"10.1145\/3143561","volume":"51","author":"TY Chen","year":"2018","unstructured":"Chen TY, Kuo FC, Liu H, Poon PL, Towey D, Tse TH, Zhou ZQ (2018) Metamorphic testing: A review of challenges and opportunities. ACM Comput Surv 51(1):4:1\u20134:27. https:\/\/doi.org\/10.1145\/3143561","journal-title":"ACM Comput Surv"},{"key":"9985_CR8","unstructured":"Chollet F, et al. (2015a) Keras. https:\/\/keras.io"},{"key":"9985_CR9","unstructured":"Chollet F, et al. (2015b) Keras applications. https:\/\/keras.io\/api\/applications\/"},{"key":"9985_CR10","volume-title":"Sampling techniques","author":"W Cochran","year":"1963","unstructured":"Cochran W (1963) Sampling techniques, 2nd edn. [Wiley Publications in Statistics.], John Wiley & Sons, New York","edition":"2"},{"key":"9985_CR11","volume-title":"Mathematical methods of statistics","author":"H Cramer","year":"1946","unstructured":"Cramer H (1946) Mathematical methods of statistics. Princeton University Press, Princeton"},{"key":"9985_CR12","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) ImageNet: A large-scale hierarchical image database. In: CVPR09","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"9985_CR13","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein J, Doran C, Solorio T (eds) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers), Association for Computational Linguistics, pp 4171\u20134186. https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"9985_CR14","doi-asserted-by":"publisher","unstructured":"Ding J, Kang X, Hu X (2017) Validating a deep learning framework by metamorphic testing. In: 2017 IEEE\/ACM 2nd international workshop on metamorphic testing (MET), pp 28\u201334. https:\/\/doi.org\/10.1109\/MET.2017.2","DOI":"10.1109\/MET.2017.2"},{"key":"9985_CR15","doi-asserted-by":"publisher","unstructured":"Dwarakanath A, Ahuja M, Sikand S, Rao RM, Bose RPJC, Dubash N, Podder S (2018) Identifying implementation bugs in machine learning based image classifiers using metamorphic testing. In: Proceedings of the 27th ACM SIGSOFT international symposium on software testing and analysis, ISSTA 2018. ACM, New York, NY, USA, pp 118\u2013128. https:\/\/doi.org\/10.1145\/3213846.3213858","DOI":"10.1145\/3213846.3213858"},{"key":"9985_CR16","doi-asserted-by":"crossref","unstructured":"Fahmy H, Pastore F, Bagherzadeh M, Briand L (2020) Supporting dnn safety analysis and retraining through heatmap-based unsupervised learning. arXiv:2002.00863","DOI":"10.1109\/TR.2021.3074750"},{"key":"9985_CR17","doi-asserted-by":"publisher","unstructured":"Fellbaum C (2006) Wordnet(s). In: Brown K (ed) Encyclopedia of language & linguistics. 2nd edn. Elsevier, Oxford, pp 665\u2013670. https:\/\/doi.org\/10.1016\/B0-08-044854-2\/00946-9http:\/\/www.sciencedirect.com\/science\/article\/pii\/B0080448542009469","DOI":"10.1016\/B0-08-044854-2\/00946-9"},{"key":"9985_CR18","doi-asserted-by":"crossref","unstructured":"Freund Y, Schapire RE (1995) A desicion-theoretic generalization of on-line learning and an application to boosting. In: Vit\u00e1nyi P (ed) Theory, computational learning. Springer Berlin Heidelberg, Berlin, Heidelberg, pp 23\u201337","DOI":"10.1007\/3-540-59119-2_166"},{"issue":"302","key":"9985_CR19","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1080\/14786440009463897","volume":"50","author":"KP FRS","year":"1900","unstructured":"FRS KP (1900) X. on the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling. Lond Edinb Dublin Philos Mag J Sci 50(302):157\u2013175. https:\/\/doi.org\/10.1080\/14786440009463897","journal-title":"Lond Edinb Dublin Philos Mag J Sci"},{"key":"#cr-split#-9985_CR20.1","unstructured":"Geirhos R, Rubisch P, Michaelis C, Bethge M, Wichmann FA, Brendel W (2019) Imagenet-trained cnns are biased towards texture"},{"key":"#cr-split#-9985_CR20.2","unstructured":"increasing shape bias improves accuracy and robustness. In: 7th International conference on learning representations, ICLR 2019, May 6-9, 2019, OpenReview.net, New Orleans, LA, USA. https:\/\/openreview.net\/forum?id=Bygh9j09KX"},{"key":"9985_CR21","doi-asserted-by":"publisher","first-page":"47230","DOI":"10.1109\/ACCESS.2019.2909068","volume":"7","author":"T Gu","year":"2019","unstructured":"Gu T, Liu K, Dolan-Gavitt B, Garg S (2019) Badnets: Evaluating backdooring attacks on deep neural networks. IEEE Access 7:47230\u201347244. https:\/\/doi.org\/10.1109\/ACCESS.2019.2909068","journal-title":"IEEE Access"},{"key":"9985_CR22","doi-asserted-by":"publisher","unstructured":"Guo J, Jiang Y, Zhao Y, Chen Q, Sun J (2018) Dlfuzz: Differential fuzzing testing of deep learning systems. In: Proceedings of the 2018 26th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, association for computing machinery, ESEC\/FSE 2018, New York, NY, USA, pp 739\u2013743. https:\/\/doi.org\/10.1145\/3236024.3264835","DOI":"10.1145\/3236024.3264835"},{"key":"9985_CR23","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"9985_CR24","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861"},{"key":"9985_CR25","doi-asserted-by":"publisher","unstructured":"Huang G, Liu Z, van der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: 2017 IEEE conference on computer vision and pattern recognition, CVPR 2017, July 21-26, 2017. IEEE Computer Society, Honolulu, HI, USA, pp 2261\u20132269. https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"9985_CR26","unstructured":"Krasin I, Duerig T, Alldrin N, Ferrari V, Abu-El-Haija S, Kuznetsova A, Rom H, Uijlings J, Popov S, Kamali S, Malloci M, Pont-Tuset J, Veit A, Belongie S, Gomes V, Gupta A, Sun C, Chechik G, Cai D, Feng Z, Narayanan D, Murphy K (2017) Openimages: A public dataset for large-scale multi-label and multi-class image classification. Dataset available from https:\/\/storagegoogleapiscom\/openimages\/web\/indexhtml"},{"key":"9985_CR27","unstructured":"Krizhevsky A, Nair V, Hinton G (2009) The cifar-10 dataset. http:\/\/www.cs.toronto.edu\/~kriz\/cifar.html"},{"key":"9985_CR28","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Pereira F, Burges CJC, Bottou L, Weinberger KQ (eds) Advances in neural information processing systems 25. Curran Associates, Inc., pp 1097\u20131105. http:\/\/papers.nips.cc\/paper\/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf"},{"issue":"1","key":"9985_CR29","doi-asserted-by":"publisher","first-page":"159","DOI":"10.2307\/2529310","volume":"33","author":"JR Landis","year":"1977","unstructured":"Landis JR, Koch GG (1977) The measurement of observer agreement for categorical data. Biometrics 33(1):159\u2013174","journal-title":"Biometrics"},{"key":"9985_CR30","unstructured":"LeCun Y, Cortes C (2010) MNIST handwritten digit database. http:\/\/yann.lecun.com\/exdb\/mnist\/"},{"key":"9985_CR31","doi-asserted-by":"crossref","unstructured":"Lin T, Maire M, Belongie SJ, Bourdev LD, Girshick RB, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft COCO: common objects in context. arXiv:1405.0312","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"9985_CR32","doi-asserted-by":"crossref","unstructured":"Lin Y, Lv F, Zhu S, Yang M, Cour T, Yu K, Cao L, Huang T (2011) Large-scale image classification: Fast feature extraction and svm training. In: CVPR 2011, pp 1689\u20131696","DOI":"10.1109\/CVPR.2011.5995477"},{"key":"9985_CR33","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu CY, Berg AC (2016) Ssd: Single shot multibox detector. In: Leibe B, Matas J, Sebe N, Welling M (eds) Computer vision \u2013 ECCV 2016. Springer International Publishing, Cham, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"9985_CR34","doi-asserted-by":"publisher","unstructured":"Ma L, Juefei-Xu F, Zhang F, Sun J, Xue M, Li B, Chen C, Su T, Li L, Liu Y, Zhao J, Wang Y (2018a) Deepgauge: Multi-granularity testing criteria for deep learning systems. In: Proceedings of the 33rd ACM\/IEEE international conference on automated software engineering, ASE 2018. ACM, New York, NY, USA, pp 120\u2013131. https:\/\/doi.org\/10.1145\/3238147.3238202","DOI":"10.1145\/3238147.3238202"},{"key":"9985_CR35","doi-asserted-by":"publisher","unstructured":"Ma L, Zhang F, Sun J, Xue M, Li B, Juefei-Xu F, Xie C, Li L, Liu Y, Zhao J, Wang Y (2018b) Deepmutation: Mutation testing of deep learning systems. In: Ghosh S, Natella R, Cukic B, Poston R, Laranjeiro N (eds) 29th IEEE international symposium on software reliability engineering, ISSRE 2018, October 15-18, 2018. IEEE Computer Society, Memphis, TN, USA, pp 100\u2013111. https:\/\/doi.org\/10.1109\/ISSRE.2018.00021","DOI":"10.1109\/ISSRE.2018.00021"},{"key":"9985_CR36","doi-asserted-by":"publisher","unstructured":"Ma S, Liu Y, Lee WC, Zhang X, Grama A (2018c) Mode: Automated neural network model debugging via state differential analysis and input selection. In: Proceedings of the 2018 26th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, association for computing machinery, ESEC\/FSE 2018, New York, NY, USA, pp 175\u2013186. https:\/\/doi.org\/10.1145\/3236024.3236082","DOI":"10.1145\/3236024.3236082"},{"key":"9985_CR37","doi-asserted-by":"crossref","unstructured":"Montavon G, Binder A, Lapuschkin S, Samek W, M\u00fcller KR (2019) Layer-wise relevance propagation: an overview. In: Explainable AI: interpreting, explaining and visualizing deep learning. Springer, pp 193\u2013209","DOI":"10.1007\/978-3-030-28954-6_10"},{"key":"9985_CR38","doi-asserted-by":"publisher","unstructured":"Moosavi-Dezfooli S, Fawzi A, Frossard P (2016) Deepfool: A simple and accurate method to fool deep neural networks. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 2574\u20132582. https:\/\/doi.org\/10.1109\/CVPR.2016.282","DOI":"10.1109\/CVPR.2016.282"},{"key":"9985_CR39","doi-asserted-by":"publisher","unstructured":"Nejadgholi M, Yang J (2019) A study of oracle approximations in testing deep learning libraries. In: 2019 34th IEEE\/ACM international conference on automated software engineering (ASE), pp 785\u2013796. https:\/\/doi.org\/10.1109\/ASE.2019.00078","DOI":"10.1109\/ASE.2019.00078"},{"key":"9985_CR40","unstructured":"Odena A, Olsson C, Andersen D, Goodfellow IJ (2019) Tensorfuzz: Debugging neural networks with coverage-guided fuzzing. In: Chaudhuri K, Salakhutdinov R (eds) Proceedings of the 36th international conference on machine learning, ICML 2019, 9-15 June 2019, Long Beach, California, USA, PMLR, Proceedings of machine learning research, vol 97, pp 4901\u20134911. http:\/\/proceedings.mlr.press\/v97\/odena19a.html"},{"key":"9985_CR41","doi-asserted-by":"publisher","unstructured":"Pei K, Cao Y, Yang J, Jana S (2017) Deepxplore: Automated whitebox testing of deep learning systems. In: Proceedings of the 26th symposium on operating systems principles, SOSP \u201917. ACM, New York, NY, USA, pp 1\u201318. https:\/\/doi.org\/10.1145\/3132747.3132785","DOI":"10.1145\/3132747.3132785"},{"key":"9985_CR42","doi-asserted-by":"publisher","unstructured":"Pham HV, Lutellier T, Qi W, Tan L (2019) CRADLE: cross-backend validation to detect and localize bugs in deep learning libraries. In: Proceedings of the 41st international conference on software engineering, ICSE \u201919. IEEE Press, pp 1027\u20131038. https:\/\/doi.org\/10.1109\/ICSE.2019.00107","DOI":"10.1109\/ICSE.2019.00107"},{"key":"9985_CR43","doi-asserted-by":"publisher","unstructured":"Qin G, Vrusias B, Gillam L (2010) Background filtering for improving of object detection in images. In: 2010 20th international conference on pattern recognition, pp 922\u2013925. https:\/\/doi.org\/10.1109\/ICPR.2010.231","DOI":"10.1109\/ICPR.2010.231"},{"key":"9985_CR44","doi-asserted-by":"publisher","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: Unified, real-time object detection. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 779\u2013788. https:\/\/doi.org\/10.1109\/CVPR.2016.91","DOI":"10.1109\/CVPR.2016.91"},{"issue":"6","key":"9985_CR45","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9985_CR46","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016) \u201cwhy should I trust you?\u201d: Explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, August 13-17, 2016, San Francisco, CA, USA, pp 1135\u20131144","DOI":"10.1145\/2939672.2939778"},{"key":"9985_CR47","doi-asserted-by":"publisher","unstructured":"Roobaert D, Zillich M, Eklundh J (2001) A pure learning approach to background-invariant object recognition using pedagogical support vector learning. In: Proceedings of the 2001 IEEE computer society conference on computer vision and pattern recognition. CVPR 2001, vol 2, pp II\u2013II. https:\/\/doi.org\/10.1109\/CVPR.2001.990982","DOI":"10.1109\/CVPR.2001.990982"},{"key":"9985_CR48","unstructured":"Rosenfeld A, Zemel RS, Tsotsos JK (2018) The elephant in the room. arXiv:1808.03305"},{"key":"9985_CR49","doi-asserted-by":"publisher","unstructured":"Sanchez J, Perronnin F (2011) High-dimensional signature compression for large-scale image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, CVPR \u201911. IEEE Computer Society, USA, pp 1665\u20131672. https:\/\/doi.org\/10.1109\/CVPR.2011.5995504","DOI":"10.1109\/CVPR.2011.5995504"},{"key":"9985_CR50","doi-asserted-by":"publisher","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh D, Batra D (2017) Grad-cam: Visual explanations from deep networks via gradient-based localization. In: IEEE international conference on computer vision, ICCV 2017, October 22-29, 2017. IEEE Computer Society, Venice, Italy, pp 618\u2013626. https:\/\/doi.org\/10.1109\/ICCV.2017.74","DOI":"10.1109\/ICCV.2017.74"},{"key":"9985_CR51","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: Bengio Y, LeCun Y (eds) 3rd international conference on learning representations, ICLR 2015, May 7-9, 2015, conference track proceedings, San Diego, CA, USA"},{"key":"9985_CR52","doi-asserted-by":"publisher","unstructured":"Stock P, Ciss\u00e9 M (2018) Convnets and imagenet beyond accuracy: Understanding mistakes and uncovering biases. In: Ferrari V, Hebert M, Sminchisescu C, Weiss Y (eds) Computer Vision - ECCV 2018 - 15th european conference, September 8-14, 2018, Proceedings, Part VI, Lecture Notes in Computer Science, vol 11210. Springer, Munich, Germany, pp 504\u2013519. https:\/\/doi.org\/10.1007\/978-3-030-01231-1_31","DOI":"10.1007\/978-3-030-01231-1_31"},{"key":"9985_CR53","doi-asserted-by":"publisher","unstructured":"Tian Y, Pei K, Jana S, Ray B (2018) Deeptest: Automated testing of deep-neural-network-driven autonomous cars. In: Proceedings of the 40th international conference on software engineering, ICSE \u201918. ACM, New York, NY, USA, pp 303\u2013314. https:\/\/doi.org\/10.1145\/3180155.3180220","DOI":"10.1145\/3180155.3180220"},{"key":"9985_CR54","doi-asserted-by":"publisher","unstructured":"Tian Y, Zeng Z, Wen M, Liu Y, Kuo Ty, Cheung SC (2020a) Evaldnn: A toolbox for evaluating deep neural network models. In: Proceedings of the ACM\/IEEE 42nd international conference on software engineering: companion proceedings, association for computing machinery, ICSE \u201920, New York, NY, USA, pp 45\u201348. https:\/\/doi.org\/10.1145\/3377812.3382133","DOI":"10.1145\/3377812.3382133"},{"key":"9985_CR55","doi-asserted-by":"publisher","unstructured":"Tian Y, Zhong Z, Ordonez V, Kaiser G, Ray B (2020b) Testing dnn image classifiers for confusion & bias errors. In: Proceedings of the ACM\/IEEE 42nd international conference on software engineering, association for computing machinery, ICSE \u201920, New York, NY, USA, pp 1122\u20131134. https:\/\/doi.org\/10.1145\/3377811.3380400","DOI":"10.1145\/3377811.3380400"},{"key":"9985_CR56","doi-asserted-by":"publisher","unstructured":"Tram\u00e8r F, Atlidakis V, Geambasu R, Hsu D, Hubaux J, Humbert M, Juels A, Lin H (2017) Fairtest: Discovering unwarranted associations in data-driven applications. In: 2017 IEEE european symposium on security and privacy (EuroS P), pp 401\u2013416. https:\/\/doi.org\/10.1109\/EuroSP.2017.29","DOI":"10.1109\/EuroSP.2017.29"},{"key":"9985_CR57","doi-asserted-by":"publisher","unstructured":"Wang S, Su Z (2020) Metamorphic object insertion for testing object detection systems. In: Proceedings of the 35th ACM\/IEEE international conference on automated software engineering, ASE 2020. ACM, New York, NY, USA, pp 1053\u20131065. https:\/\/doi.org\/10.1145\/3324884.3416584","DOI":"10.1145\/3324884.3416584"},{"issue":"6","key":"9985_CR58","doi-asserted-by":"publisher","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"F Wilcoxon","year":"1945","unstructured":"Wilcoxon F (1945) Individual comparisons by ranking methods. Biom Bull 1(6):80\u201383","journal-title":"Biom Bull"},{"key":"9985_CR59","unstructured":"Wu G, Zhu J (2020) Multi-label classification: do hamming loss and subset accuracy really conflict with each other? In: Larochelle H, Ranzato M, Hadsell R, Balcan M, Lin H (eds) Advances in neural information processing systems 33: Annual conference on neural information processing systems 2020, NeurIPS 2020, December 6-12, 2020, virtual. https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/20479c788fb27378c2c99eadcf207e7f-Abstract.html"},{"issue":"4","key":"9985_CR60","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1016\/j.jss.2010.11.920","volume":"84","author":"X Xie","year":"2011","unstructured":"Xie X, Ho JW, Murphy C, Kaiser G, Xu B, Chen TY (2011) Testing and validating machine learning classifiers by metamorphic testing. J Syst Softw 84(4):544\u2013558, the Ninth International Conference on Quality Software. https:\/\/doi.org\/10.1016\/j.jss.2010.11.920http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0164121210003213","journal-title":"J Syst Softw"},{"key":"9985_CR61","doi-asserted-by":"publisher","unstructured":"Xie X, Ma L, Juefei-Xu F, Xue M, Chen H, Liu Y, Zhao J, Li B, Yin J, See S (2019a) Deephunter: a coverage-guided fuzz testing framework for deep neural networks. In: M\u00f8ller A, Zhang D (eds) Proceedings of the 28th ACM SIGSOFT international symposium on software testing and analysis, ISSTA 2019, July 15-19, 2019. ACM, Beijing, China, pp 146\u2013157. https:\/\/doi.org\/10.1145\/3293882.3330579","DOI":"10.1145\/3293882.3330579"},{"key":"9985_CR62","doi-asserted-by":"publisher","unstructured":"Xie X, Ma L, Wang H, Li Y, Liu Y, Li X (2019b) Diffchaser: Detecting disagreements for deep neural networks. In: Proceedings of the twenty-eighth international joint conference on artificial intelligence, IJCAI-19, International joint conferences on artificial intelligence organization, pp 5772\u20135778. https:\/\/doi.org\/10.24963\/ijcai.2019\/800","DOI":"10.24963\/ijcai.2019\/800"},{"key":"9985_CR63","doi-asserted-by":"publisher","unstructured":"Yu J, Lin Z, Yang J, Shen X, Lu X, Huang TS (2018) Generative image inpainting with contextual attention. In: 2018 IEEE conference on computer vision and pattern recognition, CVPR 2018, June 18-22, 2018. IEEE Computer Society, Salt Lake City, UT, USA, pp 5505\u20135514. https:\/\/doi.org\/10.1109\/CVPR.2018.00577","DOI":"10.1109\/CVPR.2018.00577"},{"key":"9985_CR64","doi-asserted-by":"publisher","unstructured":"Zhang JM, Harman M, Ma L, Liu Y (2020) Machine learning testing: Survey, landscapes and horizons. IEEE Trans Softw Eng, pp 1\u20131. https:\/\/doi.org\/10.1109\/TSE.2019.2962027","DOI":"10.1109\/TSE.2019.2962027"},{"key":"9985_CR65","doi-asserted-by":"publisher","unstructured":"Zhang M, Zhang Y, Zhang L, Liu C, Khurshid S (2018) Deeproad: Gan-based metamorphic testing and input validation framework for autonomous driving systems. In: Proceedings of the 33rd ACM\/IEEE international conference on automated software engineering, ASE 2018. ACM, New York, NY, USA, pp 132\u2013142. https:\/\/doi.org\/10.1145\/3238147.3238187","DOI":"10.1145\/3238147.3238187"},{"key":"9985_CR66","doi-asserted-by":"crossref","unstructured":"Zhang P, Wang J, Sun J, Dong G, Wang X, Wang X, Dong JS, Ting D (2020a) White-box fairness testing through adversarial sampling. In: Proceedings of the 42nd international conference on software engineering, association for computing machinery, ICSE \u201920, New York, NY, USA","DOI":"10.1145\/3377811.3380331"},{"key":"9985_CR67","doi-asserted-by":"publisher","unstructured":"Zhang X, Xie X, Ma L, Du X, Hu Q, Liu Y, Zhao J, Sun M (2020b) Towards characterizing adversarial defects of deep learning software from the lens of uncertainty. In: Proceedings of the ACM\/IEEE 42nd international conference on software engineering, association for computing machinery, ICSE \u201920, New York, NY, USA, pp 739\u2013751. https:\/\/doi.org\/10.1145\/3377811.3380368","DOI":"10.1145\/3377811.3380368"},{"key":"9985_CR68","doi-asserted-by":"crossref","unstructured":"Zhao J, Wang T, Yatskar M, Ordonez V, Chang KW (2017) Men also like shopping: Reducing gender bias amplification using corpus-level constraints. In: Proceedings of the 2017 conference on empirical methods in natural language processing, pp 2941\u20132951. https:\/\/www.aclweb.org\/anthology\/D17-1319","DOI":"10.18653\/v1\/D17-1323"},{"key":"9985_CR69","doi-asserted-by":"publisher","unstructured":"Zhou B, Khosla A, Lapedriza A, Oliva A, Torralba A (2016) Learning deep features for discriminative localization. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 2921\u20132929. https:\/\/doi.org\/10.1109\/CVPR.2016.319","DOI":"10.1109\/CVPR.2016.319"},{"issue":"3","key":"9985_CR70","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1145\/3241979","volume":"62","author":"ZQ Zhou","year":"2019","unstructured":"Zhou ZQ, Sun L (2019) Metamorphic testing of driverless cars. Commun ACM 62(3):61\u201367. https:\/\/doi.org\/10.1145\/3241979","journal-title":"Commun ACM"},{"key":"9985_CR71","doi-asserted-by":"publisher","unstructured":"Zoph B, Vasudevan V, Shlens J, Le QV (2018) Learning transferable architectures for scalable image recognition. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 8697\u20138710. https:\/\/doi.org\/10.1109\/CVPR.2018.00907","DOI":"10.1109\/CVPR.2018.00907"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-021-09985-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-021-09985-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-021-09985-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,27]],"date-time":"2021-08-27T19:37:59Z","timestamp":1630093079000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-021-09985-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,18]]},"references-count":72,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,9]]}},"alternative-id":["9985"],"URL":"https:\/\/doi.org\/10.1007\/s10664-021-09985-1","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,18]]},"assertion":[{"value":"26 May 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 June 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of interests"}}],"article-number":"84"}}