{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T05:07:12Z","timestamp":1747890432841,"version":"3.41.0"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319181103"},{"type":"electronic","value":"9783319181110"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-18111-0_48","type":"book-chapter","created":{"date-parts":[[2015,4,9]],"date-time":"2015-04-09T12:00:29Z","timestamp":1428580829000},"page":"633-643","source":"Crossref","is-referenced-by-count":6,"title":["Language Set Identification in Noisy Synthetic Multilingual Documents"],"prefix":"10.1007","author":[{"given":"Tommi","family":"Jauhiainen","sequence":"first","affiliation":[]},{"given":"Krister","family":"Lind\u00e9n","sequence":"additional","affiliation":[]},{"given":"Heidi","family":"Jauhiainen","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"48_CR1","unstructured":"Jauhiainen, T.: Tekstin kielen automaattinen tunnistaminen. Master\u2019s thesis, University of Helsinki, Helsinki (2010)"},{"key":"48_CR2","unstructured":"Jauhiainen, T., Lindn, K.: Identifying the language of digital text (2015) (in review, submitted August 14, 2015)"},{"key":"48_CR3","unstructured":"Ludovik, Y., Zacharski, R.: Multilingual document language recognition for creating corpora. Technical report, New Mexico State University (1999)"},{"key":"48_CR4","unstructured":"Prager, J.M.: Linguini: Language identification for multilingual documents. In: Proceedings of the 32nd Annual Hawaii International Conference on System Sciences, Maui (1999)"},{"key":"48_CR5","unstructured":"Ozbek, G., Rosenn, I., Yeh, E.: Language classification in multilingual documents. Technical report, Stanford University (2006)"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Mandl, T., Shramko, M., Tartakovski, O., Womser-Hacker, C.: Language identification in multi-lingual web-documents. In: Natural Language Processing and Information Systems. Proceedings of the 11th International Conference on Applications of Natural Language to Information Systems, Klagenfurt, pp. 153\u2013163 (2006)","DOI":"10.1007\/11765448_14"},{"key":"48_CR7","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1080\/09296170500500694","volume":"13","author":"K.N. Murthy","year":"2006","unstructured":"Murthy, K.N., Kumar, G.B.: Language identification from small text samples. Journal of Quantitative Linguistics\u00a013, 57\u201380 (2006)","journal-title":"Journal of Quantitative Linguistics"},{"key":"48_CR8","unstructured":"Hughes, B., Baldwin, T., Bird, S., Nicholson, J., MacKinlay, A.: Reconsidering language identification for written language resources. In: Proceedings of the International Conference on Language Resources and Evaluation, Genoa, pp. 485\u2013488 (2006)"},{"key":"48_CR9","doi-asserted-by":"crossref","unstructured":"King, L., Kbler, S., Hooper, W.: Word-level language identification in The Chymistry of Isaac Newton. Literary and Linguistic Computing (2014)","DOI":"10.1093\/llc\/fqu032"},{"key":"48_CR10","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1162\/tacl_a_00163","volume":"2","author":"M. Lui","year":"2014","unstructured":"Lui, M., Lau, J.H., Baldwin, T.: Automatic detection and language identification of multilingual documents. Transactions of the Association for Computational Linguistics\u00a02, 27\u201340 (2014)","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"48_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1007\/978-3-642-14980-1_59","volume-title":"Structural, Syntactic, and Statistical Pattern Recognition","author":"A. Stensby","year":"2010","unstructured":"Stensby, A., Oommen, B.J., Granmo, O.-C.: Language detection and tracking in multilingual documents using weak estimators. In: Hancock, E.R., Wilson, R.C., Windeatt, T., Ulusoy, I., Escolano, F. (eds.) SSPR&SPR 2010. LNCS, vol.\u00a06218, pp. 600\u2013609. Springer, Heidelberg (2010)"},{"key":"48_CR12","unstructured":"Giguet, E.: Multilingual sentence categorization according to language. In: Proceedings of the European Chapter of the Association for Computational Linguistics SIGDAT Workshop \u201dFrom text to tags: Issues in Multilingual Language Analysis\u201d, Dublin, pp. 73\u201376 (1995)"},{"key":"48_CR13","unstructured":"Teahan, W.J.: Text classification and segmentation using minimum cross-entropy. In: Proceedings of the 6th International Conference Recherche Information Assistee par Ordinateur Paris, pp. 943\u2013961 (2000)"},{"key":"48_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1007\/978-3-642-00382-0_29","volume-title":"Computational Linguistics and Intelligent Text Processing","author":"R. \u0158eh\u016f\u0159ek","year":"2009","unstructured":"\u0158eh\u016f\u0159ek, R., Kolkus, M.: Language identification on the web: Extending the dictionary method. In: Gelbukh, A. (ed.) CICLing 2009. LNCS, vol.\u00a05449, pp. 357\u2013368. Springer, Heidelberg (2009)"},{"key":"48_CR15","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1016\/j.specom.2007.04.006","volume":"49","author":"H. Romsdorfer","year":"2007","unstructured":"Romsdorfer, H., Pfister, B.: Text analysis and language identification for polyglot text-to-speech synthesis. Speech Communication\u00a049, 697\u2013724 (2007)","journal-title":"Speech Communication"},{"key":"48_CR16","unstructured":"Romsdorfer, H.: Polyglot text-to-speech synthesis. PhD thesis, Swiss Federal Institute of Technology, Z\u00fcrich (2009)"},{"key":"48_CR17","doi-asserted-by":"crossref","unstructured":"Nguyen, D., Dogruz, A.S.: Word level language identification in online multilingual communication. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, Seattle, pp. 857\u2013862 (2013)","DOI":"10.18653\/v1\/D13-1084"},{"key":"48_CR18","unstructured":"Yamaguchi, H., Tanaka-Ishii, K.: Text segmentation by language using minimum description length. In: Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics: Long Papers, Jeju Island, vol.\u00a01, pp. 969\u2013978 (2012)"},{"key":"48_CR19","unstructured":"King, B., Abney, S.: Labeling the languages of words in mixed-language documents using weakly supervised methods. In: Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Atlanta, pp. 1110\u20131119 (2013)"},{"key":"48_CR20","unstructured":"Ullman, E.: Shibboleth - a multilingual language identifier. Master\u2019s thesis, Uppsala University, Uppsala (2014)"},{"key":"48_CR21","unstructured":"Cavnar, W.B., Trenkle, J.M.: N-gram-based text categorization. In: Proceedings of 3rd Annual Symposium on Document Analysis and Information Retrieval, SDAIR 1994, Las Vegas, pp. 161\u2013175 (1994)"}],"container-title":["Lecture Notes in Computer Science","Computational Linguistics and Intelligent Text Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-18111-0_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T02:53:29Z","timestamp":1747882409000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-18111-0_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319181103","9783319181110"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-18111-0_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}