{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T03:59:11Z","timestamp":1769745551211,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":68,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3551349.3561157","type":"proceedings-article","created":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T20:43:54Z","timestamp":1672951434000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["Unveiling Hidden DNN Defects with Decision-Based Metamorphic Testing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3053-8923","authenticated-orcid":false,"given":"Yuanyuan","family":"Yuan","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7747-0332","authenticated-orcid":false,"given":"Qi","family":"Pang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0866-0308","authenticated-orcid":false,"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2023,1,5]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Research Artifact. https:\/\/github.com\/Yuanyuan-Yuan\/Decision-Oracle."},{"key":"e_1_3_2_1_2_1","unstructured":"Marco Ancona Cengiz Oztireli and Markus Gross. 2019. Explaining deep neural networks with a polynomial time for shapley value approximation(PMLR)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0130140"},{"key":"e_1_3_2_1_4_1","volume-title":"Towards evaluating the robustness of neural networks","author":"Carlini Nicholas","unstructured":"Nicholas Carlini and David Wagner. 2017. Towards evaluating the robustness of neural networks(IEEE SP)."},{"key":"e_1_3_2_1_5_1","first-page":"2","article-title":"Exploring neural networks with activation atlases","volume":"1","author":"Carter Shan","year":"2019","unstructured":"Shan Carter, Zan Armstrong, Ludwig Schubert, Ian Johnson, and Chris Olah. 2019. Exploring neural networks with activation atlases. Distill 1(2019), 2.","journal-title":"Distill"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Aditya Chattopadhay Anirban Sarkar Prantik Howlader and Vineeth\u00a0N Balasubramanian. 2018. Grad-cam++: Generalized gradient-based visual explanations for deep convolutional networks(WACV).","DOI":"10.1109\/WACV.2018.00097"},{"key":"e_1_3_2_1_7_1","unstructured":"Tsong\u00a0Y Chen Shing\u00a0C Cheung and Shiu\u00a0Ming Yiu. 1998. Metamorphic testing: a new approach for generating next test cases. Technical Report. Technical Report HKUST-CS98-01 Department of Computer Science Hong Kong\u00a0\u2026."},{"key":"e_1_3_2_1_8_1","unstructured":"Ian Covert and Su-In Lee. 2021. Improving KernelSHAP: Practical Shapley value estimation using linear regression(ICAIS)."},{"key":"e_1_3_2_1_9_1","volume-title":"Algorithmic transparency via quantitative input influence: Theory and experiments with learning systems","author":"Datta Anupam","unstructured":"Anupam Datta, Shayak Sen, and Yair Zick. 2016. Algorithmic transparency via quantitative input influence: Theory and experiments with learning systems. In IEEE SP."},{"key":"e_1_3_2_1_10_1","unstructured":"Samet Demir Hasan\u00a0Ferit Eniser and Alper Sen. 2019. DeepSmartFuzzer: Reward Guided Test Generation For Deep Learning. arXiv preprint arXiv:1911.10621(2019)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei. 2009. ImageNet: A Large-Scale Hierarchical Image Database. In CVPR09.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2211477"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Anurag Dwarakanath Manish Ahuja Sanjay Podder Silja Vinu Arijit Naskar and MV Koushik. 2019. Metamorphic testing of a deep learning based forecaster. In MET.","DOI":"10.1109\/MET.2019.00014"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Anurag Dwarakanath Manish Ahuja Samarth Sikand Raghotham\u00a0M. Rao R.\u00a0P. Jagadeesh\u00a0Chandra Bose Neville Dubash and Sanjay Podder. 2018. Identifying Implementation Bugs in Machine Learning Based Image Classifiers Using Metamorphic Testing. In ISSTA.","DOI":"10.1145\/3213846.3213858"},{"key":"e_1_3_2_1_15_1","volume-title":"ImageNet-trained CNNs are biased towards texture","author":"Geirhos Robert","year":"1811","unstructured":"Robert Geirhos, Patricia Rubisch, Claudio Michaelis, Matthias Bethge, Felix\u00a0A Wichmann, and Wieland Brendel. 2018. ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. arXiv preprint arXiv:1811.12231(2018)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_1_17_1","unstructured":"Ian\u00a0J Goodfellow Jonathon Shlens and Christian Szegedy. 2015. Explaining and harnessing adversarial examples. In ICLR."},{"key":"e_1_3_2_1_18_1","volume-title":"XAI\u2014Explainable artificial intelligence. Science Robotics 4, 37","author":"Gunning David","year":"2019","unstructured":"David Gunning, Mark Stefik, Jaesik Choi, Timothy Miller, Simone Stumpf, and Guang-Zhong Yang. 2019. XAI\u2014Explainable artificial intelligence. Science Robotics 4, 37 (2019), eaay7120."},{"key":"e_1_3_2_1_19_1","unstructured":"Kilem\u00a0L Gwet. 2014. Handbook of inter-rater reliability: The definitive guide to measuring the extent of agreement among raters. Advanced Analytics LLC."},{"key":"e_1_3_2_1_20_1","unstructured":"Kaiming He Georgia Gkioxari Piotr Doll\u00e1r and Ross Girshick. 2017. Mask R-CNN. In CVPR. 2961\u20132969."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Matthias Hein Maksym Andriushchenko and Julian Bitterwolf. 2019. Why relu networks yield high-confidence predictions far away from the training data and how to mitigate the problem. In CVPR.","DOI":"10.1109\/CVPR.2019.00013"},{"key":"e_1_3_2_1_23_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861(2017).","author":"Howard G","year":"2017","unstructured":"Andrew\u00a0G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861(2017)."},{"key":"e_1_3_2_1_24_1","unstructured":"Boyue\u00a0Caroline Hu Lina Marsso Krzysztof Czarnecki Rick Salay Huakun Shen and Marsha Chechik. 2022. If a Human Can See It So Should Your System: Reliability Requirements for Machine Vision Components. arXiv preprint arXiv:2202.03930(2022)."},{"key":"e_1_3_2_1_25_1","volume-title":"Laurens Van Der\u00a0Maaten, and Kilian\u00a0Q Weinberger","author":"Huang Gao","year":"2017","unstructured":"Gao Huang, Zhuang Liu, Laurens Van Der\u00a0Maaten, and Kilian\u00a0Q Weinberger. 2017. Densely connected convolutional networks. In CVPR."},{"key":"e_1_3_2_1_26_1","volume-title":"imgaug. https:\/\/github.com\/aleju\/imgaug. Online","author":"Jung B.","year":"2020","unstructured":"Alexander\u00a0B. Jung, Kentaro Wada, Jon Crall, Satoshi Tanaka, Jake Graving, Christoph Reinders, Sarthak Yadav, Joy Banerjee, G\u00e1bor Vecsei, Adam Kraft, Zheng Rui, Jirka Borovec, Christian Vallentin, Semen Zhydenko, Kilian Pfeiffer, Ben Cook, Ismael Fern\u00e1ndez, Fran\u00e7ois-Michel De\u00a0Rainville, Chi-Hung Weng, Abner Ayala-Acevedo, Raphael Meudec, Matias Laporte, 2020. imgaug. https:\/\/github.com\/aleju\/imgaug. Online; accessed 01-Feb-2020."},{"key":"e_1_3_2_1_27_1","unstructured":"Alex Krizhevsky Geoffrey Hinton 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_28_1","volume-title":"A visual concept shapes image perception.Radiology 146, 2","author":"Kundel HL","year":"1983","unstructured":"HL Kundel and CF Nodine. 1983. A visual concept shapes image perception.Radiology 146, 2 (1983), 363\u2013368."},{"key":"e_1_3_2_1_29_1","unstructured":"Alexey Kurakin Ian Goodfellow and Samy Bengio. 2016. Adversarial examples in the physical world. arXiv preprint arXiv:1607.02533(2016)."},{"key":"e_1_3_2_1_30_1","volume-title":"Handwritten digit recognition with a back-propagation network. NIPS","author":"LeCun Yann","year":"1989","unstructured":"Yann LeCun, Bernhard Boser, John Denker, Donnie Henderson, Richard Howard, Wayne Hubbard, and Lawrence Jackel. 1989. Handwritten digit recognition with a back-propagation network. NIPS (1989)."},{"key":"e_1_3_2_1_31_1","volume-title":"Backpropagation applied to handwritten zip code recognition. Neural computation 1, 4","author":"LeCun Yann","year":"1989","unstructured":"Yann LeCun, Bernhard Boser, John\u00a0S Denker, Donnie Henderson, Richard\u00a0E Howard, Wayne Hubbard, and Lawrence\u00a0D Jackel. 1989. Backpropagation applied to handwritten zip code recognition. Neural computation 1, 4 (1989), 541\u2013551."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_33_1","volume-title":"CCTEST: Testing and Repairing Code Completion Systems. arXiv preprint arXiv:2208.08289(2022).","author":"Li Zongjie","year":"2022","unstructured":"Zongjie Li, Chaozheng Wang, Zhibo Liu, Haoxuan Wang, Shuai Wang, and Cuiyun Gao. 2022. CCTEST: Testing and Repairing Code Completion Systems. arXiv preprint arXiv:2208.08289(2022)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.3390\/e23010018"},{"key":"e_1_3_2_1_35_1","volume-title":"A unified approach to interpreting model predictions. Advances in neural information processing systems 30","author":"Lundberg M","year":"2017","unstructured":"Scott\u00a0M Lundberg and Su-In Lee. 2017. A unified approach to interpreting model predictions. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_36_1","volume-title":"Deepmutation: Mutation testing of deep learning systems. In ISSRE.","author":"Ma Lei","year":"2018","unstructured":"Lei Ma, Fuyuan Zhang, Jiyuan Sun, Minhui Xue, Bo Li, Felix Juefei-Xu, Chao Xie, Li Li, Yang Liu, Jianjun Zhao, 2018. Deepmutation: Mutation testing of deep learning systems. In ISSRE."},{"key":"e_1_3_2_1_37_1","unstructured":"Pingchuan Ma and Shuai Wang. 2021. MT-teql: evaluating and augmenting neural NLIDB on real-world linguistic and schema variations. (2021)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Pingchuan Ma Shuai Wang and Jin Liu. 2020. Metamorphic Testing and Certified Mitigation of Fairness Violations in NLP Models. In IJCAI. 458\u2013465.","DOI":"10.24963\/ijcai.2020\/64"},{"key":"e_1_3_2_1_39_1","unstructured":"Aleksander Madry Aleksandar Makelov Ludwig Schmidt Dimitris Tsipras and Adrian Vladu. 2017. Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083(2017)."},{"key":"e_1_3_2_1_40_1","unstructured":"Junhua Mao Xu Wei Yi Yang Jiang Wang Zhiheng Huang and Alan\u00a0L Yuille. 2015. Learning like a child: Fast novel visual concept learning from sentence descriptions of images. In ICCV."},{"key":"e_1_3_2_1_41_1","volume-title":"Layer-wise relevance propagation: an overview. Explainable AI: interpreting, explaining and visualizing deep learning","author":"Montavon Gr\u00e9goire","year":"2019","unstructured":"Gr\u00e9goire Montavon, Alexander Binder, Sebastian Lapuschkin, Wojciech Samek, and Klaus-Robert M\u00fcller. 2019. Layer-wise relevance propagation: an overview. Explainable AI: interpreting, explaining and visualizing deep learning (2019)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Shin Nakajima and Tsong\u00a0Yueh Chen. 2019. Generating biased dataset for metamorphic testing of machine learning programs. In IFIP-ICTSS.","DOI":"10.1007\/978-3-030-31280-0_4"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298640"},{"key":"e_1_3_2_1_44_1","volume-title":"Tensorfuzz: Debugging neural networks with coverage-guided fuzzing. arXiv preprint arXiv:1807.10875(2018).","author":"Odena Augustus","year":"2018","unstructured":"Augustus Odena and Ian Goodfellow. 2018. Tensorfuzz: Debugging neural networks with coverage-guided fuzzing. arXiv preprint arXiv:1807.10875(2018)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1979.4310076"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Kexin Pei Yinzhi Cao Junfeng Yang and Suman Jana. 2017. DeepXplore: Automated Whitebox Testing of Deep Learning Systems(SOSP \u201917).","DOI":"10.1145\/3132747.3132785"},{"key":"e_1_3_2_1_47_1","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems. 91\u201399."},{"key":"e_1_3_2_1_48_1","volume-title":"Why should i trust you?","author":"Ribeiro Marco\u00a0Tulio","unstructured":"Marco\u00a0Tulio Ribeiro, Sameer Singh, and Carlos Guestrin. 2016. \u201d Why should i trust you?\u201d Explaining the predictions of any classifier. In KDD."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2016.2532875"},{"key":"e_1_3_2_1_50_1","volume-title":"Grad-cam: Visual explanations from deep networks via gradient-based localization. In ICCV.","author":"Selvaraju R","year":"2017","unstructured":"Ramprasaath\u00a0R Selvaraju, Michael Cogswell, Abhishek Das, Ramakrishna Vedantam, Devi Parikh, and Dhruv Batra. 2017. Grad-cam: Visual explanations from deep networks via gradient-based localization. In ICCV."},{"key":"e_1_3_2_1_51_1","volume-title":"A value for n-person games","author":"Shapley S","unstructured":"Lloyd\u00a0S Shapley. 2016. A value for n-person games. Princeton University Press."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-019-0197-0"},{"key":"e_1_3_2_1_53_1","unstructured":"Avanti Shrikumar Peyton Greenside and Anshul Kundaje. 2017. Learning important features through propagating activation differences. In ICML."},{"key":"e_1_3_2_1_54_1","volume-title":"The kappa statistic in reliability studies: use, interpretation, and sample size requirements. Physical therapy","author":"Sim Julius","year":"2005","unstructured":"Julius Sim and Chris\u00a0C Wright. 2005. The kappa statistic in reliability studies: use, interpretation, and sample size requirements. Physical therapy (2005)."},{"key":"e_1_3_2_1_55_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014)."},{"key":"e_1_3_2_1_56_1","unstructured":"Mukund Sundararajan Ankur Taly and Qiqi Yan. 2017. Axiomatic attribution for deep networks. In ICML."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Vincent Vanhoucke Sergey Ioffe Jon Shlens and Zbigniew Wojna. 2016. Rethinking the inception architecture for computer vision. In CVPR.","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_58_1","volume-title":"To what extent do DNN-based image classification models make unreliable inferences?Empirical Software Engineering 26, 5","author":"Tian Yongqiang","year":"2021","unstructured":"Yongqiang Tian, Shiqing Ma, Ming Wen, Yepang Liu, Shing-Chi Cheung, and Xiangyu Zhang. 2021. To what extent do DNN-based image classification models make unreliable inferences?Empirical Software Engineering 26, 5 (2021), 1\u201340."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Yuchi Tian Kexin Pei Suman Jana and Baishakhi Ray. 2018. DeepTest: Automated Testing of Deep-neural-network-driven Autonomous Cars(ICSE \u201918).","DOI":"10.1145\/3180155.3180220"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/1386352.1386376"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380379"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Shuai Wang and Zhendong Su. 2020. Metamorphic Object Insertion for Testing Object Detection Systems. In ASE.","DOI":"10.1145\/3324884.3416584"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Dongwei Xiao Zhibo Liu Yuanyuan Yuan Qi Pang and Shuai Wang. 2022. Metamorphic Testing of Deep Learning Compilers. (2022).","DOI":"10.1145\/3489048.3522655"},{"key":"e_1_3_2_1_64_1","unstructured":"Xiaofei Xie Lei Ma Felix Juefei-Xu Hongxu Chen Minhui Xue Bo Li Yang Liu Jianjun Zhao Jianxiong Yin and Simon See. 2018. Coverage-guided fuzzing for deep neural networks. arXiv preprint arXiv:1809.01266(2018)."},{"key":"e_1_3_2_1_65_1","unstructured":"Yuanyuan Yuan Qi Pang and Shuai Wang. 2021. Enhancing Deep Neural Networks Testing by Traversing Data Manifold. arXiv preprint arXiv:2112.01956(2021)."},{"key":"e_1_3_2_1_66_1","volume-title":"Perception Matters: Detecting Perception Failures of VQA Models Using Metamorphic Testing. In CVPR.","author":"Yuan Yuanyuan","year":"2021","unstructured":"Yuanyuan Yuan, Shuai Wang, Mingyue Jiang, and Tsong\u00a0Yueh Chen. 2021. Perception Matters: Detecting Perception Failures of VQA Models Using Metamorphic Testing. In CVPR."},{"key":"e_1_3_2_1_67_1","volume-title":"Machine learning testing: Survey, landscapes and horizons. TSE","author":"Zhang M","year":"2020","unstructured":"Jie\u00a0M Zhang, Mark Harman, Lei Ma, and Yang Liu. 2020. Machine learning testing: Survey, landscapes and horizons. TSE (2020)."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"crossref","unstructured":"Mengshi Zhang Yuqun Zhang Lingming Zhang Cong Liu and Sarfraz Khurshid. 2018. DeepRoad: GAN-based Metamorphic Testing and Input Validation Framework for Autonomous Driving Systems. In ASE.","DOI":"10.1145\/3238147.3238187"}],"event":{"name":"ASE '22: 37th IEEE\/ACM International Conference on Automated Software Engineering","location":"Rochester MI USA","acronym":"ASE '22"},"container-title":["Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3551349.3561157","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3551349.3561157","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T08:26:50Z","timestamp":1755851210000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3551349.3561157"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":68,"alternative-id":["10.1145\/3551349.3561157","10.1145\/3551349"],"URL":"https:\/\/doi.org\/10.1145\/3551349.3561157","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2023-01-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}