{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T07:16:10Z","timestamp":1742973370558,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031824807"},{"type":"electronic","value":"9783031824814"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-82481-4_10","type":"book-chapter","created":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T10:04:16Z","timestamp":1741082656000},"page":"133-147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Robust Infidelity: When Faithfulness Measures on\u00a0Masked Language Models Are Misleading"],"prefix":"10.1007","author":[{"given":"Evan","family":"Crothers","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Herna","family":"Viktor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathalie","family":"Japkowicz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,4]]},"reference":[{"key":"10_CR1","doi-asserted-by":"publisher","unstructured":"Alzantot, M., et al.: Generating natural language adversarial examples. In: EMNLP 2018, pp. 2890\u20132896. ACL, Brussels, Belgium (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1316","DOI":"10.18653\/v1\/D18-1316"},{"key":"10_CR2","doi-asserted-by":"publisher","unstructured":"Arras, L., et al.: Explaining predictions of non-linear classifiers in NLP. In: Proceedings of the 1st Workshop on Representation Learning for NLP, pp.\u00a01\u20137. ACL, Berlin, Germany (2016). https:\/\/doi.org\/10.18653\/v1\/W16-1601","DOI":"10.18653\/v1\/W16-1601"},{"key":"10_CR3","unstructured":"Asthana, K., et al.: TCAB: a large-scale text classification attack benchmark. arXiv preprint arXiv:2210.12233 (2022)"},{"key":"10_CR4","doi-asserted-by":"publisher","unstructured":"Atanasova, P., et al.: A diagnostic study of explainability techniques for text classification. In: EMNLP 2020, pp. 3256\u20133274. ACL (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.263","DOI":"10.18653\/v1\/2020.emnlp-main.263"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Crothers, E., et al.: Adversarial robustness of neural-statistical features in detection of generative transformers. In: 2022 International Joint Conference on Neural Networks (IJCNN), pp.\u00a01\u20138. IEEE (2022)","DOI":"10.1109\/IJCNN55064.2022.9892269"},{"key":"10_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., et al.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT 2019, pp. 4171\u20134186. ACL, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"10_CR7","doi-asserted-by":"publisher","unstructured":"DeYoung, J., et al.: ERASER: a benchmark to evaluate rationalized NLP models. In: ACL 58, pp. 4443\u20134458. ACL (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.408","DOI":"10.18653\/v1\/2020.acl-main.408"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Dixon, L., et al.: Measuring and mitigating unintended bias in text classification. In: Proceedings of the 2018 AAAI\/ACM Conference on AI, Ethics, and Society, pp. 67\u201373 (2018)","DOI":"10.1145\/3278721.3278729"},{"key":"10_CR9","doi-asserted-by":"publisher","unstructured":"Ebrahimi, J., et al.: HotFlip: white-box adversarial examples for text classification. In: ACL 2018 (Volume 2: Short Papers), pp. 31\u201336. ACL, Melbourne, Australia (2018). https:\/\/doi.org\/10.18653\/v1\/P18-2006","DOI":"10.18653\/v1\/P18-2006"},{"key":"10_CR10","doi-asserted-by":"publisher","unstructured":"Feng, S., et al.: Pathologies of neural models make interpretations difficult. In: EMNLP 2018, pp. 3719\u20133728. ACL, Brussels, Belgium (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1407","DOI":"10.18653\/v1\/D18-1407"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Gao, J., et al.: Black-box generation of adversarial text sequences to evade deep learning classifiers. In: 2018 IEEE Security and Privacy Workshops (SPW), pp. 50\u201356. IEEE (2018)","DOI":"10.1109\/SPW.2018.00016"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Huber, L., et al.: Detecting word-level adversarial text attacks via shapley additive explanations. In: Proceedings of the 7th Workshop on Representation Learning for NLP, pp. 156\u2013166 (2022)","DOI":"10.18653\/v1\/2022.repl4nlp-1.16"},{"key":"10_CR13","unstructured":"Ivankay, A., et al.: Fooling explanations in text classifiers. arXiv preprint arXiv:2206.03178 (2022)"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Jacovi, A., et al.: Towards faithfully interpretable NLP systems: how should we define and evaluate faithfulness? arXiv preprint arXiv:2004.03685 (2020)","DOI":"10.18653\/v1\/2020.acl-main.386"},{"key":"10_CR15","unstructured":"Jigsaw: Jigsaw unintended bias in toxicity classification (2019). https:\/\/www.kaggle.com\/c\/jigsaw-unintended-bias-in-toxicity-classification"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Jin, D., et al.: Is BERT really robust? a strong baseline for natural language attack on text classification and entailment. In: AAAI, vol.\u00a034, pp. 8018\u20138025 (2020)","DOI":"10.1609\/aaai.v34i05.6311"},{"issue":"3","key":"10_CR17","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/3236386.3241340","volume":"16","author":"ZC Lipton","year":"2018","unstructured":"Lipton, Z.C.: The mythos of model interpretability: in machine learning, the concept of interpretability is both important and slippery. Queue 16(3), 31\u201357 (2018). https:\/\/doi.org\/10.1145\/3236386.3241340","journal-title":"Queue"},{"key":"10_CR18","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"10_CR19","unstructured":"Lundberg, S.M., et al.: A unified approach to interpreting model predictions. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 4768\u20134777. NIPS\u201917, Curran Associates Inc., Red Hook, NY, USA (2017)"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Madsen, A., et al.: Evaluating the faithfulness of importance measures in NLP by recursively masking allegedly important tokens and retraining. In: EMNLP 2022, pp. 1731\u20131751 (2022)","DOI":"10.18653\/v1\/2022.findings-emnlp.125"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"McInnes, L., et al.: UMAP: uniform manifold approximation and projection for dimension reduction. arXiv preprint arXiv:1802.03426 (2018)","DOI":"10.21105\/joss.00861"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Nguyen, D.: Comparing automatic and human evaluation of local explanations for text classification. In: NAACL-HLT 2018, pp. 1069\u20131078 (2018)","DOI":"10.18653\/v1\/N18-1097"},{"key":"10_CR23","unstructured":"Qian, E.: Twitter climate change sentiment dataset (2019). https:\/\/www.kaggle.com\/datasets\/edqian\/twitter-climate-change-sentiment-dataset"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Reimers, N., et al.: Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: EMNLP-IJCNLP 2019, pp. 3982\u20133992 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.T., et al.: Why should I trust you? Explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1135\u20131144 (2016)","DOI":"10.1145\/2939672.2939778"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Sadria, M., et al.: Adversarial training improves model interpretability in single-cell RNA-Seq analysis. Bioinf. Adv. 3(1), vbad166 (2023)","DOI":"10.1093\/bioadv\/vbad166"},{"key":"10_CR27","doi-asserted-by":"publisher","unstructured":"Samek, W., et al.: Evaluating the visualization of what a deep neural network has learned. IEEE Trans. Neural Netw. Learn. Syst. 28, 2660\u20132673 (2017). https:\/\/doi.org\/10.1109\/TNNLS.2016.2599820","DOI":"10.1109\/TNNLS.2016.2599820"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642 (2013)","DOI":"10.18653\/v1\/D13-1170"},{"key":"10_CR29","unstructured":"Sundararajan, M., et al.: Axiomatic attribution for deep networks. In: International Conference on Machine Learning, pp. 3319\u20133328. PMLR (2017)"},{"key":"10_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, pp. 5998\u20136008 (2017)"},{"key":"10_CR31","unstructured":"Xiao, H.: BERT as service: GitHub repository (2018). https:\/\/github.com\/hanxiao\/bert-as-service"},{"key":"10_CR32","doi-asserted-by":"crossref","unstructured":"Yoo, J.Y., et al.: Towards improving adversarial training of NLP models. In: Findings of the ACL: EMNLP 2021, pp. 945\u2013956 (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.81"},{"key":"10_CR33","doi-asserted-by":"publisher","unstructured":"Zafar, M.B., et al.: On the lack of robust interpretability of neural text classifiers. In: ACL-IJCNLP 2021, pp. 3730\u20133740. ACL (2021). https:\/\/doi.org\/10.18653\/v1\/2021.findings-acl.327","DOI":"10.18653\/v1\/2021.findings-acl.327"}],"container-title":["Lecture Notes in Computer Science","Machine Learning, Optimization, and Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-82481-4_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T10:04:26Z","timestamp":1741082666000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-82481-4_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031824807","9783031824814"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-82481-4_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"4 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}