{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T05:45:53Z","timestamp":1748583953066,"version":"3.40.3"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031183140"},{"type":"electronic","value":"9783031183157"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-18315-7_18","type":"book-chapter","created":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T23:03:52Z","timestamp":1665011032000},"page":"281-297","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Can We Really Trust Explanations? Evaluating the Stability of Feature Attribution Explanation Methods via Adversarial Attack"],"prefix":"10.1007","author":[{"given":"Zhao","family":"Yang","sequence":"first","affiliation":[]},{"given":"Yuanzhe","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhongtao","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Yiming","family":"Ju","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Kang","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,6]]},"reference":[{"unstructured":"Adebayo, J., Gilmer, J., Muelly, M., Goodfellow, I., Hardt, M., Kim, B.: Sanity checks for saliency maps. arXiv preprint arXiv:1810.03292 (2018)","key":"18_CR1"},{"doi-asserted-by":"crossref","unstructured":"Alzantot, M., Sharma, Y., Elgohary, A., Ho, B.J., Srivastava, M., Chang, K.W.: Generating natural language adversarial examples. arXiv preprint arXiv:1804.07998 (2018)","key":"18_CR2","DOI":"10.18653\/v1\/D18-1316"},{"doi-asserted-by":"crossref","unstructured":"Atanasova, P., Simonsen, J.G., Lioma, C., Augenstein, I.: A diagnostic study of explainability techniques for text classification. arXiv preprint arXiv:2009.13295 (2020)","key":"18_CR3","DOI":"10.18653\/v1\/2020.emnlp-main.263"},{"doi-asserted-by":"crossref","unstructured":"Bastings, J., Aziz, W., Titov, I.: Interpretable neural predictions with differentiable binary variables. arXiv preprint arXiv:1905.08160 (2019)","key":"18_CR4","DOI":"10.18653\/v1\/P19-1284"},{"issue":"3","key":"18_CR5","doi-asserted-by":"publisher","first-page":"153","DOI":"10.2307\/408741","volume":"2","author":"L Bloomfield","year":"1926","unstructured":"Bloomfield, L.: A set of postulates for the science of language. Language 2(3), 153\u2013164 (1926)","journal-title":"Language"},{"unstructured":"Bommasani, R., et al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)","key":"18_CR6"},{"doi-asserted-by":"publisher","unstructured":"Conneau, A., Kiela, D., Schwenk, H., Barrault, L., Bordes, A.: Supervised learning of universal sentence representations from natural language inference data. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 670\u2013680. Association for Computational Linguistics, Copenhagen, Denmark (2017). https:\/\/doi.org\/10.18653\/v1\/D17-1070. https:\/\/aclanthology.org\/D17-1070","key":"18_CR7","DOI":"10.18653\/v1\/D17-1070"},{"unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)","key":"18_CR8"},{"doi-asserted-by":"crossref","unstructured":"DeYoung, J., et al.: ERASER: a benchmark to evaluate rationalized NLP models. arXiv preprint arXiv:1911.03429 (2019)","key":"18_CR9","DOI":"10.18653\/v1\/2020.acl-main.408"},{"doi-asserted-by":"crossref","unstructured":"Ding, S., Koehn, P.: Evaluating saliency methods for neural language models. arXiv preprint arXiv:2104.05824 (2021)","key":"18_CR10","DOI":"10.18653\/v1\/2021.naacl-main.399"},{"doi-asserted-by":"crossref","unstructured":"Ghorbani, A., Abid, A., Zou, J.: Interpretation of neural networks is fragile. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 3681\u20133688 (2019)","key":"18_CR11","DOI":"10.1609\/aaai.v33i01.33013681"},{"unstructured":"Heo, J., Joo, S., Moon, T.: Fooling neural network interpretations via adversarial model manipulation. In: Advances in Neural Information Processing Systems, pp. 2925\u20132936 (2019)","key":"18_CR12"},{"unstructured":"Herman, B.: The promise and peril of human evaluation for model interpretability. arXiv preprint arXiv:1711.07414 (2017)","key":"18_CR13"},{"doi-asserted-by":"crossref","unstructured":"Jacovi, A., Goldberg, Y.: Towards faithfully interpretable NLP systems: how should we define and evaluate faithfulness? arXiv preprint arXiv:2004.03685 (2020)","key":"18_CR14","DOI":"10.18653\/v1\/2020.acl-main.386"},{"unstructured":"Jain, S., Wallace, B.C.: Attention is not explanation. arXiv preprint arXiv:1902.10186 (2019)","key":"18_CR15"},{"doi-asserted-by":"publisher","unstructured":"Jiang, Z., Zhang, Y., Yang, Z., Zhao, J., Liu, K.: Alignment rationale for natural language inference. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 5372\u20135387. Association for Computational Linguistics, Online (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.417. https:\/\/aclanthology.org\/2021.acl-long.417","key":"18_CR16","DOI":"10.18653\/v1\/2021.acl-long.417"},{"unstructured":"Li, J., Monroe, W., Jurafsky, D.: Understanding neural networks through representation erasure. arXiv preprint arXiv:1612.08220 (2016)","key":"18_CR17"},{"issue":"3","key":"18_CR18","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/3236386.3241340","volume":"16","author":"ZC Lipton","year":"2018","unstructured":"Lipton, Z.C.: The mythos of model interpretability: in machine learning, the concept of interpretability is both important and slippery. Queue 16(3), 31\u201357 (2018)","journal-title":"Queue"},{"unstructured":"Maas, A.L., Daly, R.E., Pham, P.T., Huang, D., Ng, A.Y., Potts, C.: Learning word vectors for sentiment analysis. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 142\u2013150. Association for Computational Linguistics, Portland, Oregon, USA (2011). https:\/\/aclanthology.org\/P11-1015","key":"18_CR19"},{"issue":"11","key":"18_CR20","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: Wordnet: a lexical database for English. Commun. ACM 38(11), 39\u201341 (1995)","journal-title":"Commun. ACM"},{"key":"18_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.artint.2018.07.007","volume":"267","author":"T Miller","year":"2019","unstructured":"Miller, T.: Explanation in artificial intelligence: insights from the social sciences. Artif. Intell. 267, 1\u201338 (2019)","journal-title":"Artif. Intell."},{"unstructured":"Molnar, C.: Interpretable Machine Learning. Lulu. com (2020)","key":"18_CR22"},{"doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), pp. 1532\u20131543 (2014)","key":"18_CR23","DOI":"10.3115\/v1\/D14-1162"},{"unstructured":"Qi, F., Yang, C., Liu, Z., Dong, Q., Sun, M., Dong, Z.: OpenHowNet: an open sememe-based lexical knowledge base. arXiv preprint arXiv:1901.09957 (2019)","key":"18_CR24"},{"doi-asserted-by":"crossref","unstructured":"Ren, S., Deng, Y., He, K., Che, W.: Generating natural language adversarial examples through probability weighted word saliency. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 1085\u20131097 (2019)","key":"18_CR25","DOI":"10.18653\/v1\/P19-1103"},{"doi-asserted-by":"crossref","unstructured":"Ribeiro, M.T., Singh, S., Guestrin, C.: Why should i trust you? explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1135\u20131144 (2016)","key":"18_CR26","DOI":"10.1145\/2939672.2939778"},{"key":"18_CR27","series-title":"Human\u2013Computer Interaction Series","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1007\/978-3-319-90403-0_9","volume-title":"Human and Machine Learning","author":"M Robnik-\u0160ikonja","year":"2018","unstructured":"Robnik-\u0160ikonja, M., Bohanec, M.: Perturbation-based explanations of prediction models. In: Zhou, J., Chen, F. (eds.) Human and Machine Learning. HIS, pp. 159\u2013175. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-90403-0_9"},{"issue":"5","key":"18_CR28","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1038\/s42256-019-0048-x","volume":"1","author":"C Rudin","year":"2019","unstructured":"Rudin, C.: Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead. Nat. Mach. Intell. 1(5), 206\u2013215 (2019)","journal-title":"Nat. Mach. Intell."},{"unstructured":"Samanta, S., Mehta, S.: Towards crafting text adversarial samples. arXiv preprint arXiv:1707.02812 (2017)","key":"18_CR29"},{"unstructured":"Simonyan, K., Vedaldi, A., Zisserman, A.: Deep inside convolutional networks: visualising image classification models and saliency maps. arXiv preprint arXiv:1312.6034 (2013)","key":"18_CR30"},{"doi-asserted-by":"crossref","unstructured":"Slack, D., Hilgard, S., Jia, E., Singh, S., Lakkaraju, H.: Fooling lime and SHAP: adversarial attacks on post hoc explanation methods. In: Proceedings of the AAAI\/ACM Conference on AI, Ethics, and Society, pp. 180\u2013186 (2020)","key":"18_CR31","DOI":"10.1145\/3375627.3375830"},{"unstructured":"Smilkov, D., Thorat, N., Kim, B., Vi\u00e9gas, F., Wattenberg, M.: SmoothGrad: removing noise by adding noise. arXiv preprint arXiv:1706.03825 (2017)","key":"18_CR32"},{"unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642 (2013)","key":"18_CR33"},{"doi-asserted-by":"crossref","unstructured":"Spearman, C.: The proof and measurement of association between two things. (1961)","key":"18_CR34","DOI":"10.1037\/11491-005"},{"unstructured":"Sundararajan, M., Taly, A., Yan, Q.: Axiomatic attribution for deep networks. arXiv preprint arXiv:1703.01365 (2017)","key":"18_CR35"},{"doi-asserted-by":"crossref","unstructured":"Wang, J., Tuyls, J., Wallace, E., Singh, S.: Gradient-based analysis of NLP models is manipulable. arXiv preprint arXiv:2010.05419 (2020)","key":"18_CR36","DOI":"10.18653\/v1\/2020.findings-emnlp.24"},{"doi-asserted-by":"crossref","unstructured":"Wiegreffe, S., Pinter, Y.: Attention is not not explanation. arXiv preprint arXiv:1908.04626 (2019)","key":"18_CR37","DOI":"10.18653\/v1\/D19-1002"},{"doi-asserted-by":"crossref","unstructured":"Zang, Y., et al.: Word-level textual adversarial attacking as combinatorial optimization. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 6066\u20136080 (2020)","key":"18_CR38","DOI":"10.18653\/v1\/2020.acl-main.540"},{"issue":"3","key":"18_CR39","first-page":"1","volume":"11","author":"WE Zhang","year":"2020","unstructured":"Zhang, W.E., Sheng, Q.Z., Alhazmi, A., Li, C.: Adversarial attacks on deep-learning models in natural language processing: a survey. ACM Trans. Intell. Syst. Technol. (TIST) 11(3), 1\u201341 (2020)","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"}],"container-title":["Lecture Notes in Computer Science","Chinese Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-18315-7_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T23:21:16Z","timestamp":1665012076000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-18315-7_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031183140","9783031183157"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-18315-7_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"6 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CCL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China National Conference on Chinese Computational Linguistics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nanchang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cncl2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/cips-cl.org\/static\/CCL2022\/en\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"www.softconf.com","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"293","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"293 submissions included both Chinese and English papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}