{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T21:14:27Z","timestamp":1774300467669,"version":"3.50.1"},"publisher-location":"Stroudsburg, PA, USA","reference-count":0,"publisher":"Association for Computational Linguistics","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.18653\/v1\/2026.eacl-long.58","type":"proceedings-article","created":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T20:23:46Z","timestamp":1774297426000},"page":"1267-1283","source":"Crossref","is-referenced-by-count":0,"title":["Aleph-Alpha-GermanWeb: Improving German-language LLM pre-training with model-based data curation and synthetic data generation"],"prefix":"10.18653","author":[{"given":"Thomas F","family":"Burns","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Letitia","family":"Parcalabescu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephan","family":"Waeldchen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Barlow","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gregor","family":"Ziegltrum","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Volker","family":"Stampa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bastian","family":"Harren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bj\u00f6rn","family":"Deiseroth","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"1643","event":{"name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","location":"Rabat, Morocco","start":{"date-parts":[[2026,3]]},"end":{"date-parts":[[2026,3]]}},"container-title":["Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)"],"original-title":[],"deposited":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T20:25:07Z","timestamp":1774297507000},"score":1,"resource":{"primary":{"URL":"https:\/\/aclanthology.org\/2026.eacl-long.58"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":0,"URL":"https:\/\/doi.org\/10.18653\/v1\/2026.eacl-long.58","relation":{},"subject":[],"published":{"date-parts":[[2026]]}}}