{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T20:49:06Z","timestamp":1781383746925,"version":"3.54.1"},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642201271","type":"print"},{"value":"9783642201288","type":"electronic"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-20128-8_6","type":"book-chapter","created":{"date-parts":[[2013,12,13]],"date-time":"2013-12-13T12:15:18Z","timestamp":1386936918000},"page":"113-130","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Measuring the Distance Between Comparable Corpora Between Languages"],"prefix":"10.1007","author":[{"given":"Serge","family":"Sharoff","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2013,12,14]]},"reference":[{"key":"6_CR1","unstructured":"Adafre, S., de Rijke, M.: Finding Similar Sentences Across Multiple Languages in Wikipedia. In: Proceedings 11th EACL, pp. 62\u201369. Trento (2006)"},{"key":"6_CR2","unstructured":"Babych, B., Hartley, A.: Meta-Evaluation of Comparability Metrics using Parallel Corpora. In: Proceedings CICLING, (2011)"},{"key":"6_CR3","unstructured":"Baroni, M., Bernardini, S.: Bootcat: Bootstrapping Corpora and Terms from the Web. In: Proceedings of LREC2004. Lisbon (2004). http:\/\/sslmit.unibo.it\/~baroni\/publications\/lrec2004\/bootcat_lrec_2004.pdf"},{"issue":"3","key":"6_CR4","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1007\/s10579-009-9081-4","volume":"43","author":"M Baroni","year":"2009","unstructured":"Baroni, M., Bernardini, S., Ferraresi, A., Zanchetta, E.: The WaCky wide web: a collection of very large linguistically processed web-crawled corpora. Lang. Resour. Eval. 43(3), 209\u2013226 (2009)","journal-title":"Lang. Resour. Eval."},{"key":"6_CR5","unstructured":"Blancafort, H., Daille, B., Gornostay, T., Heid, U., Mechoulam, C., Sharoff, S.: TTC: Terminology Extraction, Translation Tools and Comparable Corpora. In: Proceedings EURALEX2010. Leeuwarden (5\u20136 July 2010)"},{"key":"6_CR6","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent Dirichlet allocation. J. Mach. Learning Res. 3, 993\u20131022 (2003)","journal-title":"J. Mach. Learning Res."},{"key":"6_CR7","unstructured":"Chang, J., Boyd-Graber, J., Wang, C., Gerrish, S., Blei, D.M.: Reading Tea Leaves: How Humans Interpret Topic Models. In: Proceedings Neural Information Processing Systems (2009)"},{"key":"6_CR8","unstructured":"Eisele, A., Chen, Y.: MultiUN: A multilingual Corpus from United Nations Documents. In: Proceedings of the Seventh Conference on International Language Resources and Evaluation (LREC\u201910). Valletta, Malta (2010). http:\/\/www.euromatrixplus.net\/multi-un\/"},{"issue":"2","key":"6_CR9","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1145\/1041394.1041395","volume":"38","author":"H Joho","year":"2004","unstructured":"Joho, H., Sanderson, M.: The SPIRIT collection: an overview of a large web collection. SIGIR Forum 38(2), 57\u201361 (2004)","journal-title":"SIGIR Forum"},{"issue":"1","key":"6_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1075\/ijcl.6.1.01roc","volume":"6","author":"A Kilgarriff","year":"2001","unstructured":"Kilgarriff, A.: Comparing corpora. Int. J. Corpus Linguistics 6(1), 1\u201337 (2001)","journal-title":"Int. J. Corpus Linguistics"},{"key":"6_CR11","unstructured":"Koehn, P.: Europarl: A Parallel Corpus for Statistical Machine Translation. In: Proceedings MT Summit 2005 (2005). http:\/\/www.iccs.inf.ed.ac.uk\/pkoehn\/publications\/europarl-mtsummit05.pdf"},{"key":"6_CR12","unstructured":"Lee, D.: Genres, registers, text types, domains, and styles: clarifying the concepts and navigating a path through the BNC jungle. Lang. Learning Technol. 5(3), 37\u201372 (2001). http:\/\/llt.msu.edu\/vol5num3\/pdf\/lee.pdf"},{"key":"6_CR13","unstructured":"Li, B., Gaussier, E.: Improving Corpus Comparability for Bilingual Lexicon Extraction from Comparable Corpora. In: Proceedings COLING\u201910. Beijing, China (August 2010)"},{"issue":"1","key":"6_CR14","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1162\/089120103321337421","volume":"29","author":"FJ Och","year":"2003","unstructured":"Och, F.J., Ney, H.: A systematic comparison of various statistical alignment models. Comput. Linguistics 29(1), 19\u201351 (2003)","journal-title":"Comput. Linguistics"},{"key":"6_CR15","unstructured":"Rayson, P., Berridge, D., Francis, B.: Extending the Cochran Rule for the Comparison of Word Frequencies Between Corpora. In: Proceedings 7th International Conference on Statistical Analysis of Textual Data (JADT 2004), pp. 926\u2013936. Louvain-la-Neuve (2004)"},{"key":"6_CR16","unstructured":"Sharoff, S.: Creating general-purpose corpora using automated search engine queries. In: Baroni, M., Bernardini, S. (eds.) WaCky! Working Papers on the Web as Corpus. Gedit, Bologna (2006). http:\/\/wackybook.sslmit.unibo.it"},{"key":"6_CR17","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/978-90-481-9178-9_7","volume-title":"Genres on the Web: Computational Models and Empirical Studies","author":"S Sharoff","year":"2010","unstructured":"Sharoff, S.: In the garden and in the jungle: Comparing genres in the BNC and Internet. In: Mehler, A., Sharoff, S., Santini, M. (eds.) Genres on the Web: Computational Models and Empirical Studies, pp. 149\u2013166. Springer, Berlin (2010)"},{"key":"6_CR18","unstructured":"Sharoff, S., Kopotev, M., Erjavec, T., Feldman, A., Divjak, D.: Designing and evaluating a Russian tagset. In: Proceedings of the Sixth Language Resources and Evaluation Conference, LREC 2008. Marrakech (2008)"},{"key":"6_CR19","unstructured":"Steinbach, M., Karypis, G., Kumar, V.: A Comparison of Document Clustering Techniques. In: KDD Workshop on Text Mining (2000)"},{"issue":"3","key":"6_CR20","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1023\/B:MACH.0000027785.44527.d6","volume":"55","author":"Y Zhao","year":"2004","unstructured":"Zhao, Y., Karypis, G.: Empirical and theoretical comparisons of selected criterion functions for document clustering. Machine Learning 55(3), 311\u2013331 (2004)","journal-title":"Machine Learning"}],"container-title":["Building and Using Comparable Corpora"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-20128-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,14]],"date-time":"2023-02-14T09:19:30Z","timestamp":1676366370000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-20128-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642201271","9783642201288"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-20128-8_6","relation":{},"subject":[],"published":{"date-parts":[[2013]]},"assertion":[{"value":"14 December 2013","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}