{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:23:39Z","timestamp":1760315019931,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032079589","type":"print"},{"value":"9783032079596","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07959-6_20","type":"book-chapter","created":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T09:22:23Z","timestamp":1760260943000},"page":"271-285","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Estimation of the Genre Composition of the English Subcorpus of the Google Books Ngram"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8792-1491","authenticated-orcid":false,"given":"Vladimir","family":"Bochkarev","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6558-3521","authenticated-orcid":false,"given":"Andrey A.","family":"Achkeev","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2659-1887","authenticated-orcid":false,"given":"Anna","family":"Shevlyakova","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"key":"20_CR1","unstructured":"Google Books Ngram Viewer. https:\/\/books.google.com\/ngrams\/. Accessed 30 June 2025"},{"key":"20_CR2","unstructured":"Lin, Y., Michel, J.-B., Aiden, E.L., Orwant, J., Brockman, W., Petrov, S.: Syntactic annotations for the Google Books Ngram corpus. In: Li, H., Lin, C.-Y., Osborne, M., Lee, G.G., Park, J.C. (eds.) 50th Annual Meeting of the Association for Computational Linguistics 2012, Proceedings of the Conference, vol. 2, pp. 238\u2013242. Association for Computational Linguistics, Jeju Island, Korea (2012)"},{"issue":"6014","key":"20_CR3","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1126\/science.1199644","volume":"331","author":"J-B Michel","year":"2011","unstructured":"Michel, J.-B., Shen, Y.K., Aiden, A.P., Veres, A., Gray, M.K., et al.: Quantitative analysis of culture using millions of digitized books. Science 331(6014), 176\u2013182 (2011)","journal-title":"Science"},{"issue":"2","key":"20_CR4","doi-asserted-by":"publisher","first-page":"144","DOI":"10.30884\/seh\/2024.02.06","volume":"23","author":"V Solovyev","year":"2024","unstructured":"Solovyev, V.: Using the Google Books Ngram corpus to study social evolution. Soc. Evol. Hist. 23(2), 144\u2013164 (2024)","journal-title":"Soc. Evol. Hist."},{"issue":"10","key":"20_CR5","doi-asserted-by":"publisher","first-page":"e0137041","DOI":"10.1371\/journal.pone.0137041","volume":"10","author":"EA Pechenick","year":"2015","unstructured":"Pechenick, E.A., Danforth, C.M., Dodds, P.S.: Characterizing the Google books corpus: strong limits to inferences of socio-cultural and linguistic evolution. PLoS ONE 10(10), e0137041 (2015). https:\/\/doi.org\/10.1371\/journal.pone.0137041","journal-title":"PLoS ONE"},{"key":"20_CR6","doi-asserted-by":"publisher","first-page":"17","DOI":"10.17223\/18137083\/56\/2","volume":"3","author":"VI Belikov","year":"2016","unstructured":"Belikov, V.I.: What and how can a linguist get from digitized texts? Siberian J. Philol. 3, 17\u201334 (2016). (in Russian)","journal-title":"Siberian J. Philol."},{"issue":"1","key":"20_CR7","first-page":"169","volume":"32","author":"A Koplenig","year":"2017","unstructured":"Koplenig, A.: The impact of lacking metadata for the measurement of cultural and linguistic change using the Google Ngram data sets - reconstructing the composition of the German corpus in times of WWII. Digit. Scholarsh. Humanit. 32(1), 169\u2013188 (2017)","journal-title":"Digit. Scholarsh. Humanit."},{"key":"20_CR8","doi-asserted-by":"publisher","unstructured":"Solovyev, V.D., Bochkarev, V.V., Akhtyamova, S.S.: Google Books Ngram: problems of representativeness and data reliability. In: Elizarov, A., Novikov, B., Stupnikov, S. (eds.) Data Analytics and Management in Data Intensive Domains, DAMDID\/RCDL 2019. CCIS, vol. 1223, pp. 147\u2013162. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-51913-1_10","DOI":"10.1007\/978-3-030-51913-1_10"},{"key":"20_CR9","doi-asserted-by":"publisher","unstructured":"Solovyev, V., Ivleva, A.: How to detect imbalances in the Google Books Ngram corpus? In: Karpov, A., Deli\u0107, V. (eds.) Speech and Computer, SPECOM 2024. LNCS, vol. 15300, pp. 334\u2013348. Springer, Cham. (2025). https:\/\/doi.org\/10.1007\/978-3-031-78014-1_25","DOI":"10.1007\/978-3-031-78014-1_25"},{"issue":"1","key":"20_CR10","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1007\/s10579-023-09695-8","volume":"59","author":"T Kuzman","year":"2025","unstructured":"Kuzman, T., Ljube\u0161i\u0107, N.: Automatic genre identification: a survey. Lang. Resour. Eval. 59(1), 537\u2013570 (2025)","journal-title":"Lang. Resour. Eval."},{"issue":"4","key":"20_CR11","doi-asserted-by":"publisher","first-page":"949","DOI":"10.1007\/s10579-018-9418-y","volume":"52","author":"D Pritsos","year":"2018","unstructured":"Pritsos, D., Stamatatos, E.: Open set evaluation of web genre identification. Lang. Resour. Eval. 52(4), 949\u2013968 (2018)","journal-title":"Lang. Resour. Eval."},{"issue":"1","key":"20_CR12","first-page":"3","volume":"2","author":"D Biber","year":"2015","unstructured":"Biber, D., Egbert, J.: Using grammatical features for automatic register identification in an unrestricted corpus of documents from the open web. J. Res. Des. Stat. Linguist. Commun. Sci. 2(1), 3\u201336 (2015)","journal-title":"J. Res. Des. Stat. Linguist. Commun. Sci."},{"key":"20_CR13","doi-asserted-by":"publisher","first-page":"233","DOI":"10.13053\/rcs-70-1-18","volume":"70","author":"PN Priyatam","year":"2013","unstructured":"Priyatam, P.N., Iyengar, S., Perumal, K., Varma, V.: Don\u2019t use a lot when little will do: genre identification using URLs. Res. Comput. Sci. 70, 233\u2013243 (2013)","journal-title":"Res. Comput. Sci."},{"key":"20_CR14","unstructured":"Laippala, V., Kyll\u00f6nen, R., Egbert, J., Biber, D., Pyysalo, S.: Toward multilingual identification of online registers. In: Proceedings of the 22nd Nordic Conference on Computational Linguistics, Turku, Finland, pp. 292\u2013297. Link\u00f6ping University Electronic Press (2019)"},{"key":"20_CR15","unstructured":"Kuzman, T., Pollak, S.: Assessing comparability of genre datasets via cross-lingual and cross-dataset experiments. In: Fi\u0161er, D., Erjavec, T. (eds.) Jezikovne tehnologije in digitalna humanistika: Zbornik conference, pp. 100\u2013107. Institute of Contemporary History (2022)"},{"issue":"7","key":"20_CR16","doi-asserted-by":"publisher","first-page":"817","DOI":"10.3103\/S0146411623070076","volume":"57","author":"KV Lagutina","year":"2023","unstructured":"Lagutina, K.V.: Genre classification of Russian texts based on modern embeddings and rhythm. Autom. Control. Comput. Sci. 57(7), 817\u2013827 (2023)","journal-title":"Autom. Control. Comput. Sci."},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Repo, L., et al.: Beyond the English web: zero-shot cross-lingual and lightweight monolingual classification of registers. In: 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop, pp. 183\u2013191. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.eacl-srw.24"},{"key":"20_CR18","doi-asserted-by":"publisher","first-page":"1149","DOI":"10.3390\/make5030059","volume":"5","author":"T Kuzman","year":"2023","unstructured":"Kuzman, T., Mozeti\u010d, I., Ljube\u0161i\u0107, N.: Automatic genre identification for robust enrichment of massive text collections: investigation of classification methods in the era of large language models. Mach. Learn. Knowl. Extr. 5, 1149\u20131175 (2023). https:\/\/doi.org\/10.3390\/make5030059","journal-title":"Mach. Learn. Knowl. Extr."},{"key":"20_CR19","unstructured":"Vajjala, S., Shimangaud, S.: Text classification in the LLM era - where do we stand? arXiv preprint arXiv:2502.11830 (2025)"},{"key":"20_CR20","unstructured":"Corpus of Historical American English. https:\/\/www.english-corpora.org\/coha\/. Accessed 30 June 2025"},{"issue":"2","key":"20_CR21","doi-asserted-by":"publisher","first-page":"121","DOI":"10.3366\/cor.2012.0024","volume":"7","author":"M Davies","year":"2012","unstructured":"Davies, M.: Expanding horizons in historical linguistics with the 400-million word Corpus of Historical American English. Corpora 7(2), 121\u2013157 (2012). https:\/\/doi.org\/10.3366\/cor.2012.0024","journal-title":"Corpora"},{"issue":"3","key":"20_CR22","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1093\/biomet\/68.3.589","volume":"68","author":"B Efron","year":"1981","unstructured":"Efron, B.: Nonparametric estimates of standard error: the Jackknife, the bootstrap and other methods Get access Arrow. Biometrika 68(3), 589\u2013599 (1981). https:\/\/doi.org\/10.1093\/biomet\/68.3.589","journal-title":"Biometrika"},{"key":"20_CR23","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. Adaptive Computation and Machine Learning. MIT Press (2016)"},{"key":"20_CR24","unstructured":"Abadi, M., et al.: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint, arXiv:1603.04467 (2016)"},{"key":"20_CR25","unstructured":"Chollet, F.: Keras (n.d.). https:\/\/keras.io. Accessed 30 June 2025"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07959-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T09:22:27Z","timestamp":1760260947000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07959-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"ISBN":["9783032079589","9783032079596"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07959-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"13 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Szeged","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hungary","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom.inf.u-szeged.hu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}