{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T10:09:24Z","timestamp":1759226964578,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534299","type":"proceedings-article","created":{"date-parts":[[2021,9,21]],"date-time":"2021-09-21T20:40:52Z","timestamp":1632256852000},"page":"1-8","source":"Crossref","is-referenced-by-count":10,"title":["How Can the [MASK] Know? The Sources and Limitations of Knowledge in BERT"],"prefix":"10.1109","author":[{"given":"Maksim","family":"Podkorytov","sequence":"first","affiliation":[]},{"given":"Daniel","family":"Bis","sequence":"additional","affiliation":[]},{"given":"Xiuwen","family":"Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Electra Pretraining text encoders as discriminators rather than generators","year":"2020","author":"clark","key":"ref30"},{"journal-title":"Layer normalization","year":"2016","author":"ba","key":"ref10"},{"journal-title":"KG-BERT BERT for Knowledge Graph Completion","year":"2019","author":"yao","key":"ref11"},{"journal-title":"XLNet Generalized Autoregressive Pretraining for Language Understanding","year":"2019","author":"yang","key":"ref12"},{"journal-title":"A primer in bertology What we know about how bert works","year":"2020","author":"rogers","key":"ref13"},{"journal-title":"Transformer-xl Attentive language models beyond a fixed-length context","year":"2019","author":"dai","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"journal-title":"olmpics-on what language model pre-training captures","year":"2019","author":"talmor","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1470"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1005"},{"journal-title":"Realm Retrieval-augmented language model pre-training","year":"2020","author":"guu","key":"ref19"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1097\/01.TA.0000162148.03280.02"},{"journal-title":"RoBERTa A Robustly optimized BERT Pretraining Approach","year":"2019","author":"liu","key":"ref4"},{"journal-title":"On identifiability in transformers","year":"2019","author":"brunner","key":"ref27"},{"key":"ref3","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"NAACL-HLT"},{"journal-title":"Assessing bert&#x2019;s syntactic abilities","year":"2019","author":"goldberg","key":"ref6"},{"journal-title":"Albert A lite bert for self-supervised learning of language representations","year":"2019","author":"lan","key":"ref29"},{"journal-title":"What does bert look at? an analysis of bert's attention","year":"2019","author":"clark","key":"ref5"},{"journal-title":"How can we know what language models know?","year":"2019","author":"jiang","key":"ref8"},{"journal-title":"Language models as knowledge bases?","year":"2019","author":"petroni","key":"ref7"},{"key":"ref2","article-title":"Improving language understanding by generative pretraining","author":"radford","year":"2018","journal-title":"Preprint"},{"journal-title":"How context affects language models' factual predictions","year":"2020","author":"petroni","key":"ref9"},{"key":"ref1","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NIPS"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412159"},{"journal-title":"Gaussian error linear units (gelus)","year":"2016","author":"hendrycks","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-15719-7_10"},{"journal-title":"HuggingFace's Transformers State-of-the-art Natural Language Processing","year":"2019","author":"wolf","key":"ref24"},{"journal-title":"Google's neural machine translation system Bridging the gap between human and machine translation","year":"2016","author":"wu","key":"ref23"},{"key":"ref26","article-title":"Binary codes capable of correcting deletions, insertions, and reversals","volume":"10","author":"levenshtein","year":"1966","journal-title":"Soviet Physics Doklady"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.2307\/1412159"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534299.pdf?arnumber=9534299","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:46:13Z","timestamp":1652197573000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534299\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534299","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}