{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:37:40Z","timestamp":1742971060365,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642235375"},{"type":"electronic","value":"9783642235382"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-23538-2_45","type":"book-chapter","created":{"date-parts":[[2011,8,27]],"date-time":"2011-08-27T07:11:37Z","timestamp":1314429097000},"page":"356-363","source":"Crossref","is-referenced-by-count":15,"title":["Web Text Data Mining for Building Large Scale Language Modelling Corpus"],"prefix":"10.1007","author":[{"given":"Jan","family":"\u0160vec","sequence":"first","affiliation":[]},{"given":"Jan","family":"Hoidekr","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Soutner","sequence":"additional","affiliation":[]},{"given":"Jan","family":"Vavru\u0161ka","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"45_CR1","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/3-540-45323-7_44","volume-title":"Text, Speech and Dialogue","author":"L. M\u00fcller","year":"2000","unstructured":"M\u00fcller, L., Psutka, J., \u0160m\u00eddl, L.: Design of speech recognition engine. In: Sojka, P., Kope\u010dek, I., Pala, K. (eds.) TSD 2000. LNCS (LNAI), vol.\u00a01902, pp. 259\u2013264. Springer, Heidelberg (2000)"},{"key":"45_CR2","doi-asserted-by":"crossref","unstructured":"Seymore, K., Rosenfeld, R.: Using story topics for language model adaptation. In: Proc. Eurospeech, vol.\u00a097, pp. 1987\u20131990 (1997)","DOI":"10.21437\/Eurospeech.1997-527"},{"key":"45_CR3","doi-asserted-by":"crossref","unstructured":"Bulyko, I., Ostendorf, M., Siu, M., Ng, T., Stolcke, A., \u00c7etin, O.: Web resources for language modeling in conversational speech recognition. ACM Trans. Speech Lang. Process.\u00a05 (2007)","DOI":"10.1145\/1322391.1322392"},{"key":"45_CR4","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1007\/978-3-540-74628-7_43","volume-title":"Text, Speech and Dialogue","author":"J. Matou\u0161ek","year":"2007","unstructured":"Matou\u0161ek, J., Romportl, J.: Recording and annotation of speech corpus for Czech unit selection speech synthesis. In: Matou\u0161ek, V., Mautner, P. (eds.) TSD 2007. LNCS (LNAI), vol.\u00a04629, pp. 326\u2013333. Springer, Heidelberg (2007)"},{"key":"45_CR5","unstructured":"Spoustov\u00e1, D., Spousta, M., Pecina, P.: Building a Web Corpus of Czech. In: Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC 2010), Valletta, Malta (2010)"},{"key":"45_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1007\/978-3-642-15760-8_53","volume-title":"Text, Speech and Dialogue","author":"J. Trmal","year":"2010","unstructured":"Trmal, J., Pra\u017e\u00e1k, A., Loose, Z., Psutka, J.: Online TV Captioning of Czech Parliamentary Sessions. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2010. LNCS, vol.\u00a06231, pp. 416\u2013422. Springer, Heidelberg (2010)"},{"key":"45_CR7","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1007\/11551874_42","volume-title":"Text, Speech and Dialogue","author":"J. Zelinka","year":"2005","unstructured":"Zelinka, J., Kanis, J., M\u00fcller, L.: Automatic transcription of numerals in inflectional languages. In: Matou\u0161ek, V., Mautner, P., Pavelka, T. (eds.) TSD 2005. LNCS (LNAI), vol.\u00a03658, pp. 326\u2013333. Springer, Heidelberg (2005)"},{"issue":"8-13","key":"45_CR8","doi-asserted-by":"publisher","first-page":"1157","DOI":"10.1016\/S0169-7552(97)00031-7","volume":"29","author":"A.Z. Broder","year":"1997","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S., Zweig, G.: Syntactic clustering of the web. Computer Networks and ISDN Systems\u00a029(8-13), 1157\u20131166 (1997)","journal-title":"Computer Networks and ISDN Systems"},{"key":"45_CR9","unstructured":"Malkin, M., Venkatesan, R.: Comparison of texts streams in the presence of mild adversaries. In: Proceedings of the 2005 Australasian Workshop on Grid Computing and e-research, ACSW Frontiers 2005, vol.\u00a044, pp. 179\u2013186. Australian Computer Society, Inc. (2005)"},{"key":"45_CR10","series-title":"LNCS(LNAI)","first-page":"64","volume-title":"TDS 2011","author":"L. Skorkovsk\u00e1","year":"2011","unstructured":"Skorkovsk\u00e1, L., Ircing, P., Pra\u017e\u00e1k, A., Lehe\u010dka, J.: Automatic topic identification for large scale language modeling data filtering. In: Habernal, I., Matou\u0161ek, V. (eds.) TDS 2011. LNCS(LNAI), vol.\u00a06836, pp. 64\u201371. Springer, Heidelberg (2011)"},{"issue":"1","key":"45_CR11","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1075\/ijcl.6.1.05kil","volume":"6","author":"A. Kilgarriff","year":"2001","unstructured":"Kilgarriff, A.: Comparing corpora. International journal of corpus linguistics\u00a06(1), 97\u2013133 (2001)","journal-title":"International journal of corpus linguistics"}],"container-title":["Lecture Notes in Computer Science","Text, Speech and Dialogue"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-23538-2_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,9]],"date-time":"2023-06-09T01:09:53Z","timestamp":1686272993000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-23538-2_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642235375","9783642235382"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-23538-2_45","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2011]]}}}