{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T07:31:19Z","timestamp":1725607879031},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9206645","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:40:33Z","timestamp":1601426433000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Effects of Architecture and Training on Embedding Geometry and Feature Discriminability in BERT"],"prefix":"10.1109","author":[{"given":"Maksim","family":"Podkorytov","sequence":"first","affiliation":[]},{"given":"Daniel","family":"Bis","sequence":"additional","affiliation":[]},{"given":"Jinglun","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Kobra","family":"Amirizirtol","sequence":"additional","affiliation":[]},{"given":"Xiuwen","family":"Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"year":"2019","author":"alsentzer","article-title":"Publicly available clinical bert embeddings","key":"ref33"},{"year":"2015","author":"schakel","article-title":"Measuring word significance using distributed representations of words","key":"ref32"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.18653\/v1\/D19-1448"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.18653\/v1\/W19-4828"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.18653\/v1\/P19-1452"},{"year":"2006","author":"mahoney","journal-title":"text8 dataset","key":"ref10"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.18653\/v1\/D19-1006"},{"key":"ref12","article-title":"Optimal linear representations of images for object recognition","author":"liu","year":"2003","journal-title":"CVPR"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1016\/0377-0427(87)90125-7"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ICPR.2010.764"},{"year":"2014","author":"kingma","article-title":"Adam: A method for stochastic optimization","key":"ref15"},{"year":"2017","author":"loshchilov","article-title":"Decoupled weight decay regularization","key":"ref16"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1080\/14786440109462720"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-343","article-title":"Recurrent neural network based language model","author":"mikolov","year":"2010","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.3115\/v1\/D14-1162"},{"year":"2019","author":"du","article-title":"Using bert for word sense disambiguation","key":"ref28"},{"year":"2019","author":"liu","article-title":"Roberta: A robustly optimized bert pretraining approach","key":"ref4"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.18653\/v1\/P19-1459"},{"key":"ref3","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"NAACL-HLT"},{"key":"ref6","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NIPS"},{"year":"0","author":"huang","article-title":"Clinicalbert: Modeling clinical notes and predicting hospital readmission","key":"ref29"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref8","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"NIPS"},{"year":"2016","author":"wu","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","key":"ref7"},{"key":"ref2","article-title":"Efficient estimation of word representations in vector space","author":"mikolov","year":"2013","journal-title":"ICLRE"},{"year":"2019","author":"wolf","article-title":"Huggingface&#x2019;s transformers: State-of-the-art natural language processing","key":"ref9"},{"key":"ref1","article-title":"A neural probabilistic language model","author":"bengio","year":"2003","journal-title":"J Mach Learn Res"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1162\/neco.1997.9.8.1735"},{"year":"2018","author":"radford","article-title":"Improving language understanding by generative pretraining","key":"ref22"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-65","article-title":"Lstm neural networks for language modeling","author":"sundermeyer","year":"2012","journal-title":"InterSpeech"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.18653\/v1\/P19-1580"},{"key":"ref23","article-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding","author":"wang","year":"2019","journal-title":"ICLRE"},{"year":"2019","author":"goldberg","article-title":"Assessing bert&#x2019;s syntactic abilities","key":"ref26"},{"year":"2019","author":"ettinger","article-title":"What bert is not: Lessons from a new suite of psycholinguistic diagnostics for language models","key":"ref25"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09206645.pdf?arnumber=9206645","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,21]],"date-time":"2022-11-21T06:22:44Z","timestamp":1669011764000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9206645\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9206645","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}