{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T04:18:47Z","timestamp":1769573927402,"version":"3.49.0"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319108155","type":"print"},{"value":"9783319108162","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-10816-2_31","type":"book-chapter","created":{"date-parts":[[2014,9,1]],"date-time":"2014-09-01T05:25:37Z","timestamp":1409549137000},"page":"247-256","source":"Crossref","is-referenced-by-count":38,"title":["Aranea: Yet\u00a0Another\u00a0Family\u00a0of\u00a0(Comparable)\u00a0Web\u00a0Corpora"],"prefix":"10.1007","author":[{"given":"Vladim\u00edr","family":"Benko","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"31_CR1","unstructured":"Baroni, B., Bernardini, S.: BootCaT: Bootstrapping corpora and terms from the web. In: Proc. 4th Int. Conf. on Language Resources and Evaluation, Lisbon (2004)"},{"issue":"3","key":"31_CR2","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1007\/s10579-009-9081-4","volume":"43","author":"M. Baroni","year":"2009","unstructured":"Baroni, M., Bernardini, S., Ferraresi, A., Zanchetta, E.: The WaCky Wide Web: A Collection of Very Large Linguistically Processed Web-Crawled Corpora. Language Resources and Evaluation\u00a043(3), 209\u2013226 (2009)","journal-title":"Language Resources and Evaluation"},{"key":"31_CR3","first-page":"27","volume-title":"Slovko 2013: Natural Language Processing, Corpus Linguistics, E-learning","author":"V. Benko","year":"2013","unstructured":"Benko, V.: Data Deduplication in Slovak Corpora. In: Slovko 2013: Natural Language Processing, Corpus Linguistics, E-learning, pp. 27\u201339. RAM-Verlag, L\u00fcdenscheid (2013)"},{"key":"31_CR4","unstructured":"Benko, V.: Compatible Sketch Grammars for Comparable Corpora. In: Proc. XVI EURALEX Int. Congress, Bolzano (in print, 2014)"},{"key":"31_CR5","doi-asserted-by":"crossref","unstructured":"Garab\u00edk, R., \u0160imkov\u00e1, M.: Slovak Morphosyntactic Tagset. Journal of Language Modelling\u00a0(1), 41\u201363 (2012)","DOI":"10.15398\/jlm.v0i1.35"},{"key":"31_CR6","unstructured":"Grefenstette, G.: Generating resources for the lexicography of under-resourced languages. Invited lecture at eLex 2013 Int. Conference, Tallinn (2013)"},{"key":"31_CR7","volume-title":"Disambiguation of Rich Inflection (Computational Morphology of Czech)","author":"J. Haji\u010d","year":"2004","unstructured":"Haji\u010d, J.: Disambiguation of Rich Inflection (Computational Morphology of Czech). Karolinum, Praha (2004)"},{"key":"31_CR8","unstructured":"Jakub\u00ed\u010dek, M., Kilgarriff, A., Kov\u00e1\u0159, V., Rychl\u00fd, P., Suchomel, V.: The TenTen Corpus Family. In: Proc. Int. Conf. on Corpus Linguistics, Lancaster (2013)"},{"issue":"1","key":"31_CR9","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1075\/ijcl.6.1.05kil","volume":"6","author":"A. Kilgarriff","year":"2001","unstructured":"Kilgarriff, A.: Comparing Corpora. International Journal of Corpus Linguistics\u00a06(1), 97\u2013133 (2001)","journal-title":"International Journal of Corpus Linguistics"},{"key":"31_CR10","unstructured":"Kilgarriff, A., Rychl\u00fd, P., Smr\u017e, P., Tugwell, D.: The Sketch Engine. In: Proc. XI EURALEX Int. Congress, Lorient, pp. 105\u2013116 (2004)"},{"key":"31_CR11","unstructured":"Petrov, S., Das, D., McDonald, R.: A Universal Part-of-Speech Tagset. In: Proc. 8th Int. Conf. on Language Resources and Evaluation, Istanbul (2012)"},{"key":"31_CR12","first-page":"151","volume":"11","author":"M. Piasecki","year":"2007","unstructured":"Piasecki, M.: Polish Tagger TaKIPI: Rule Based Construction and Optimisation. Task Quarterly\u00a011, 151\u2013167 (2007)","journal-title":"Task Quarterly"},{"key":"31_CR13","unstructured":"Pomik\u00e1lek, J.: Removing Boilerplate and Duplicate Content from Web Corpora. Ph.D. thesis, Masaryk University, Brno (2011)"},{"key":"31_CR14","first-page":"65","volume-title":"1st Workshop on Recent Advances in Slavonic Natural Language Processing","author":"P. Rychl\u00fd","year":"2007","unstructured":"Rychl\u00fd, P.: Manatee\/Bonito \u2013 A Modular Corpus Manager. In: 1st Workshop on Recent Advances in Slavonic Natural Language Processing, pp. 65\u201370. Masaryk University, Brno (2007)"},{"key":"31_CR15","doi-asserted-by":"crossref","unstructured":"Sch\u00e4fer, R., Bildhauer, F.: Web Corpus Construction. Synthesis Lectures on Human Language Technologies. Morgan & Claypool Publishers (2013)","DOI":"10.2200\/S00508ED1V01Y201305HLT022"},{"key":"31_CR16","unstructured":"Schmid, H.: Probabilistic Part-of-Speech Tagging Using Decision Trees. In: Proceedings of International Conference on New Methods in Language Processing, Manchester (1994)"},{"key":"31_CR17","unstructured":"Suchomel, V., Pomik\u00e1lek, J.: Efficient Web Crawling for Large Text Corpora. In: 7th Web as Corpus Workshop (WAC-7), Lyon, France (2012)"}],"container-title":["Lecture Notes in Computer Science","Text, Speech and Dialogue"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-10816-2_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T18:39:21Z","timestamp":1558982361000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-10816-2_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319108155","9783319108162"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-10816-2_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]}}}