{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T07:14:01Z","timestamp":1780384441644,"version":"3.54.1"},"publisher-location":"Stroudsburg, PA, USA","reference-count":0,"publisher":"Association for Computational Linguistics","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.18653\/v1\/2020.findings-emnlp.414","type":"proceedings-article","created":{"date-parts":[[2020,11,29]],"date-time":"2020-11-29T09:58:51Z","timestamp":1606643931000},"page":"4617-4624","source":"Crossref","is-referenced-by-count":85,"title":["Byte Pair Encoding is Suboptimal for Language Model Pretraining"],"prefix":"10.18653","author":[{"given":"Kaj","family":"Bostrom","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Greg","family":"Durrett","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"1643","event":{"name":"Findings of the Association for Computational Linguistics: EMNLP 2020","location":"Online","start":{"date-parts":[[2020,11]]},"end":{"date-parts":[[2020,11]]}},"container-title":["Findings of the Association for Computational Linguistics: EMNLP 2020"],"original-title":[],"deposited":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T18:38:59Z","timestamp":1769884739000},"score":1,"resource":{"primary":{"URL":"https:\/\/aclanthology.org\/2020.findings-emnlp.414"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":0,"URL":"https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.414","relation":{},"subject":[],"published":{"date-parts":[[2020]]}}}