{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T04:00:41Z","timestamp":1781064041294,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,11,3]],"date-time":"2019-11-03T00:00:00Z","timestamp":1572739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100007601","name":"Horizon 2020","doi-asserted-by":"publisher","award":["732328"],"award-info":[{"award-number":["732328"]}],"id":[{"id":"10.13039\/501100007601","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Bundesministerium f\u00fcr Bildung und Forschung","award":["01UG1735BX,01MD19003B"],"award-info":[{"award-number":["01UG1735BX,01MD19003B"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,11,3]]},"DOI":"10.1145\/3357384.3358028","type":"proceedings-article","created":{"date-parts":[[2019,11,4]],"date-time":"2019-11-04T14:11:35Z","timestamp":1572876695000},"page":"1823-1832","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":66,"title":["How Does BERT Answer Questions?"],"prefix":"10.1145","author":[{"given":"Betty","family":"van Aken","sequence":"first","affiliation":[{"name":"Beuth University of Applied Sciences, Berlin, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Benjamin","family":"Winter","sequence":"additional","affiliation":[{"name":"Beuth University of Applied Sciences, Berlin, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alexander","family":"L\u00f6ser","sequence":"additional","affiliation":[{"name":"Beuth University of Applied Sciences, Berlin, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Felix A.","family":"Gers","sequence":"additional","affiliation":[{"name":"Beuth University of Applied Sciences, Berlin, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2019,11,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Fuzzy Sets and Systems. ELSEVIER Fuzzy Sets and Systems","volume":"90","author":"Zadeh Lotfi A","year":"1997"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of ACL 2017 .","author":"Belinkov Yonatan"},{"key":"e_1_3_2_1_3_1","volume-title":"Signal Processing","volume":"36","author":"Comon Pierre","year":"1994"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of LREC 2018 .","author":"Conneau Alexis","year":"2018"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of NIPS 2015 .","author":"Andrew"},{"key":"e_1_3_2_1_6_1","volume-title":"Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context. CoRR","author":"Dai Zihang","year":"2019"},{"key":"e_1_3_2_1_7_1","volume-title":"Universal Transformers. In Proceedings of SMACD 2018 .","author":"Dehghani Mostafa","year":"2018"},{"key":"e_1_3_2_1_8_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR","author":"Devlin Jacob","year":"2018"},{"key":"e_1_3_2_1_9_1","volume-title":"MIPRO 2018 .","author":"Dosilovic F. K."},{"key":"e_1_3_2_1_10_1","volume-title":"On lines and planes of closest fit to systems of points in space. The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science","author":"Karl Pearson F.R.S.","year":"1901"},{"key":"e_1_3_2_1_11_1","volume-title":"Assessing BERT's Syntactic Abilities. CoRR","author":"Goldberg Yoav","year":"2019"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Riccardo Guidotti Anna Monreale Franco Turini Dino Pedreschi and Fosca Giannotti. 2018. A Survey Of Methods For Explaining Black Box Models. ACM Comput. Surv. (2018).  Riccardo Guidotti Anna Monreale Franco Turini Dino Pedreschi and Fosca Giannotti. 2018. A Survey Of Methods For Explaining Black Box Models. ACM Comput. Surv. (2018).","DOI":"10.1145\/3236009"},{"key":"e_1_3_2_1_13_1","volume-title":"Fine-tuned Language Models for Text Classification. CoRR","author":"Howard Jeremy","year":"2018"},{"key":"e_1_3_2_1_14_1","unstructured":"Huggingface. 2018. pytorch-pretrained-BERT. (2018). https:\/\/github.com\/huggingface\/pytorch-pretrained-BERT  Huggingface. 2018. pytorch-pretrained-BERT. (2018). https:\/\/github.com\/huggingface\/pytorch-pretrained-BERT"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of IJCAI 2018 .","author":"Hupkes Dieuwke"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of NAACL 2019 .","author":"Jain Sarthak"},{"key":"e_1_3_2_1_17_1","volume-title":"Martin","author":"Jurafsky Dan","year":"2009"},{"key":"e_1_3_2_1_18_1","volume-title":"Understanding Neural Networks through Representation Erasure. CoRR","author":"Li Jiwei","year":"2016"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.3115\/1072228.1072378"},{"key":"e_1_3_2_1_20_1","volume-title":"The Mythos of Model Interpretability. ACM Queue","author":"Lipton Zachary Chase","year":"2016"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of NAACL 2019 .","author":"Liu Nelson F."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056489"},{"key":"e_1_3_2_1_23_1","volume-title":"Caiming Xiong, and Richard Socher.","author":"McCann Bryan","year":"2018"},{"key":"e_1_3_2_1_24_1","volume-title":"Efficient Estimation of Word Representations in Vector Space. In Workshop Track Proceedings of ICLR 2013 .","author":"Mikolov Tomas","year":"2013"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-422"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"e_1_3_2_1_27_1","volume-title":"Understanding the Behaviors of BERT in Ranking. CoRR","author":"Qiao Yifan","year":"2019"},{"key":"e_1_3_2_1_28_1","volume-title":"Improving Language Understanding by Generative Pre-Training. OpenAI Blog","author":"Radford Alec","year":"2018"},{"key":"e_1_3_2_1_29_1","volume-title":"Language Models are Unsupervised Multitask Learners. OpenAI Blog","author":"Radford Alec","year":"2019"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"e_1_3_2_1_31_1","unstructured":"Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQuAD: 100 000  Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQuAD: 100 000"},{"key":"e_1_3_2_1_32_1","volume-title":"Machine Comprehension of Text. In Proceedings of EMNLP 2016 .","author":"Questions"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of ICLR 2017 .","author":"Seo Min Joon"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1159"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of ICLR 2019 .","author":"Tenney Ian","year":"2019"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of AISTATS 2009 .","author":"van der Maaten Laurens","year":"2009"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of NIPS 2017 .","author":"Vaswani Ashish","year":"2017"},{"key":"e_1_3_2_1_38_1","volume-title":"Overview of TREC 2001. In Proceedings of TREC 2001 .","author":"Voorhees Ellen","year":"2001"},{"key":"e_1_3_2_1_39_1","volume-title":"OntoNotes: A Large Training Corpus for Enhanced Processing","author":"Weischedel Ralph"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of ICLR 2016 .","author":"Weston Jason","year":"2016"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of EMNLP 2018 .","author":"Yang Zhilin"},{"key":"e_1_3_2_1_42_1","volume-title":"Visual interpretability for deep learning: a survey. Frontiers of IT & EE","author":"Zhu Zhang","year":"2018"}],"event":{"name":"CIKM '19: The 28th ACM International Conference on Information and Knowledge Management","location":"Beijing China","acronym":"CIKM '19","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 28th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3357384.3358028","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3357384.3358028","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:18Z","timestamp":1750203858000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3357384.3358028"}},"subtitle":["A Layer-Wise Analysis of Transformer Representations"],"short-title":[],"issued":{"date-parts":[[2019,11,3]]},"references-count":42,"alternative-id":["10.1145\/3357384.3358028","10.1145\/3357384"],"URL":"https:\/\/doi.org\/10.1145\/3357384.3358028","relation":{},"subject":[],"published":{"date-parts":[[2019,11,3]]},"assertion":[{"value":"2019-11-03","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}