{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T15:23:23Z","timestamp":1745508203354,"version":"3.40.3"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319990033"},{"type":"electronic","value":"9783319990040"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-319-99004-0_5","type":"book-chapter","created":{"date-parts":[[2019,2,6]],"date-time":"2019-02-06T17:11:22Z","timestamp":1549473082000},"page":"141-188","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Mapping and Aligning Units from Comparable Corpora"],"prefix":"10.1007","author":[{"given":"Ahmet","family":"Aker","sequence":"first","affiliation":[]},{"given":"Alexandru","family":"Ceau\u0219u","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Robert","family":"Gaizauskas","sequence":"additional","affiliation":[]},{"given":"Sabine","family":"Hunsicker","sequence":"additional","affiliation":[]},{"given":"Radu","family":"Ion","sequence":"additional","affiliation":[]},{"given":"Elena","family":"Irimia","sequence":"additional","affiliation":[]},{"given":"Dan","family":"\u0218tef\u0103nescu","sequence":"additional","affiliation":[]},{"given":"Dan","family":"Tufi\u0219","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,2,7]]},"reference":[{"unstructured":"Aker, A., Kanoulas, E., & Gaizauskas, R. (2012a). A light way to collect comparable corpora from the Web. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012) (pp. 21\u201327), Istanbul, Turkey.","key":"5_CR1"},{"unstructured":"Aker, A., Feng, Y., & Gaizauskas, R. (2012b). Automatic bilingual phrase extraction from comparable corpora. In Proceedings of the 24th International Conference on Computational Linguistics (COLING 2012), IIT Bombay, Mumbai, India.","key":"5_CR2"},{"unstructured":"Aswani, N., & Gaizauskas, R. (2010). English-Hindi transliteration using multiple similarity metrics. In Proceedings of the 7th Language Resources and Evaluation Conference (LREC 2010), Valletta, Malta.","key":"5_CR3"},{"unstructured":"Borman, S. (2009). The expectation maximization algorithm. A short tutorial. http:\/\/www.seanborman.com\/publications\/EM_algorithm.pdf","key":"5_CR5"},{"issue":"2","key":"5_CR7","first-page":"263","volume":"19","author":"PF Brown","year":"1993","unstructured":"Brown, P. F., Pietra, S. A. D., Pietra, V. J. D., & Mercer, R. L. (1993). The mathematics of statistical machine translation: Parameter estimation. Computational Linguistics, 19(2), 263\u2013311.","journal-title":"Computational Linguistics"},{"unstructured":"Ceau\u015fu, A. (2009). Statistical machine translation for Romanian. PhD Thesis, Romanian Academy (in Romanian).","key":"5_CR9"},{"doi-asserted-by":"crossref","unstructured":"Chen, S. F.(1993). Aligning sentences in bilingual corpora using lexical information. In Proceedings of the 31st Annual Meeting on Association for Computational Linguistics (pp. 9\u201316), Columbus, OH.","key":"5_CR10","DOI":"10.3115\/981574.981576"},{"doi-asserted-by":"crossref","unstructured":"Chiang, D. (2005). A hierarchical phrase-based model for statistical machine translation. In Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics, June 2005 (pp. 263\u2013270), Ann Arbor, MI.","key":"5_CR11","DOI":"10.3115\/1219840.1219873"},{"volume-title":"WordNet: An electronic lexical database","year":"1998","unstructured":"Fellbaum, C. (Ed.) (1998) WordNet: An electronic lexical database. Cambridge, MA: MIT Press.","key":"5_CR13"},{"unstructured":"Fung, P., & Cheung, P. (2004). Mining very-non-parallel corpora: Parallel sentence and lexicon extraction via bootstrapping and EM. In Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing (EMNLP-2004) (pp. 57\u201363), Barcelona, Spain.","key":"5_CR14"},{"issue":"1","key":"5_CR15","first-page":"75","volume":"19","author":"WA Gale","year":"1993","unstructured":"Gale, W. A., & Church, K. W. (1993). A program for aligning sentences in bilingual corpora. Computational Linguistics, 19(1), 75\u2013102.","journal-title":"Computational Linguistics"},{"unstructured":"Gao, Q., & Vogel, S. (2008). Parallel implementations of a word alignment tool. In Proceedings of ACL-08 HLT: Software Engineering, Testing, and Quality Assurance for Natural Language Processing, June 20, 2008 (pp. 49\u201357), Ohio State University, Columbus, OH.","key":"5_CR16"},{"unstructured":"Hewavitharana, S., & Vogel, S. (2011). Extracting parallel phrases from comparable data. In Proceedings of the 4th Workshop on Building and Using Comparable Corpora: Comparable Corpora and the Web (BUCC 2011) (pp. 61\u201368), Portland, OR.","key":"5_CR17"},{"unstructured":"Ion, R. (2012). PEXACC: A parallel sentence mining algorithm from comparable corpora. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012) (pp. 2181\u20132188), May 21\u201327, 2012, Istanbul, Turkey.","key":"5_CR18"},{"unstructured":"Ion, R., Ceau\u015fu, A., & Irimia, E. (2011a). An expectation maximization algorithm for textual unit alignment. In Proceedings of the 4th Workshop on Building and Using Comparable Corpora (BUCC 2011) (pp. 128\u2013135), June 24th, 2011, Portland, OR.","key":"5_CR19"},{"unstructured":"Ion, R., Zhang, X., Su, F., Paramita, M., & \u0218tef\u0103nescu, D. (2011b). Report on Multi-Level Alignment of Comparable Corpora. Technical report no. D2.2 of the ACCURAT Project ( http:\/\/www.accurat-project.eu\/ ).","key":"5_CR20"},{"unstructured":"Koehn, P. (2004). Statistical significance tests for machine translation evaluation. In Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing (EMNLP-2004) (pp. 388\u2013395), Barcelona, Spain.","key":"5_CR22"},{"unstructured":"Koehn, P. (2005). Europarl: A parallel corpus for statistical machine translation. In Proceedings of the Tenth Machine Translation Summit, September 12\u201316, 2005 (pp. 79\u201486), Phuket, Thailand.","key":"5_CR23"},{"doi-asserted-by":"crossref","unstructured":"Koehn, P., Och, F., & Marcu, D. (2003). Statistical phrase-based translation. In Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology (pp. 48\u201354), May 27\u2013June 1, 2003, Edmonton, Canada.","key":"5_CR24","DOI":"10.3115\/1073445.1073462"},{"doi-asserted-by":"crossref","unstructured":"Koehn, P., Hoang, H., Birch, A., Callison-Burch, C., Federico, M., Cowan, B., et al. (2007). Moses: Open source toolkit for statistical machine translation. In Proceedings of the 45th Annual Meeting of the ACL Companion Volume Proceedings of the Demo and Poster Sessions (pp. 177\u2013180), Prague, Czech Republic.","key":"5_CR25","DOI":"10.3115\/1557769.1557821"},{"key":"5_CR27","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to information retrieval","author":"C Manning","year":"2008","unstructured":"Manning, C., Raghavan, P., & Schutze, H. (2008). Introduction to information retrieval (Vol. 1). Cambridge: Cambridge University Press."},{"unstructured":"Munteanu, D. S., & Marcu, D. (2002). Processing comparable corpora with bilingual suffix trees. In Proceedings of the 2002 Conference on Empirical Methods in Natural Language Processing (EMNLP 2002) (pp. 289\u2013295), July 6\u20137, 2002, University of Pennsylvania, Philadelphia, PA","key":"5_CR30"},{"issue":"4","key":"5_CR31","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1162\/089120105775299168","volume":"31","author":"DS Munteanu","year":"2005","unstructured":"Munteanu, D. S., & Marcu, D. (2005). Improving machine translation performance by exploiting non-parallel corpora. Computational Linguistics, 31(4), 477\u2013504.","journal-title":"Computational Linguistics"},{"doi-asserted-by":"crossref","unstructured":"Och, F. J. (2003). Minimum error rate training in statistical machine translation. Proceedings of the 41st Annual Meeting on Association for Computational Linguistics (pp. 160\u2013167), July 07\u201312, 2003, Sapporo, Japan.","key":"5_CR57","DOI":"10.3115\/1075096.1075117"},{"issue":"1","key":"5_CR34","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1162\/089120103321337421","volume":"29","author":"FJ Och","year":"2003","unstructured":"Och, F. J., & Ney, H. (2003). A systematic comparison of various statistical alignment models. Computational Linguistics, 29(1), 19\u201351.","journal-title":"Computational Linguistics"},{"issue":"4","key":"5_CR35","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1162\/0891201042544884","volume":"30","author":"FJ Och","year":"2004","unstructured":"Och, F. J., & Ney, H. (2004). The alignment template approach to statistical machine translation. Computational Linguistics, 30(4), 417\u2013449.","journal-title":"Computational Linguistics"},{"unstructured":"Papineni, K., Roukos, S., Ward, T., & Zhu, W. (2002). BLEU: A method for automatic evaluation of machine translation. In Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, July 7\u201312 2002 (pp. 311\u2013318), University of Pennsylvania, Philadelphia, PA.","key":"5_CR36"},{"unstructured":"Quirk, C., Udupa, R., & Menezes, A. (2007). Generative models of noisy translations with applications to parallel fragment extraction. In Proceedings of the MT Summit XI (pp. 321\u2013327), September, 2007, Copenhagen, Demark.","key":"5_CR37"},{"issue":"4","key":"5_CR38","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1007\/s10590-011-9114-9","volume":"25","author":"SA Rauf","year":"2011","unstructured":"Rauf, S. A., & Schwenk, H. (2011). Parallel sentence generation from comparable corpora for improved SMT. Machine Translation, 25(4), 341\u2013375.","journal-title":"Machine Translation"},{"unstructured":"Skadi\u0146a, I., Aker, A., Giouli, V., Tufi\u015f, D., Gaizauskas, R., Mieri\u0146a, M., et al. (2010). A collection of comparable corpora for under-resourced languages. In Proceedings of the Fourth International Conference Baltic HLT 2010. Frontiers in Artificial Intelligence and Applications (Vol. 219, pp. 161\u2013168), IOS Press.","key":"5_CR40"},{"unstructured":"Snover, M., Dorr, B., Schwartz, R., Micciulla, L., & Makhoul, J. (2006). A study of translation edit rate with targeted human annotation. In Proceedings of the 7th Conference of the Association for Machine Translation in the Americas (AMTA 2006): Visions for the Future of Machine Translation (pp. 223\u2013231), Cambridge, MA.","key":"5_CR42"},{"unstructured":"Snover, M., Madnani, N., Dorr, B., & Schwartz, R. (2009). Fluency, adequacy, or HTER? Exploring different human judgments with a tunable MT metric. In Proceedings of the Fourth Workshop on Statistical Machine Translation (pp. 259\u2013268). Association for Computational Linguistics, Athens, Greece.","key":"5_CR43"},{"unstructured":"\u0218tef\u0103nescu, D., Ion, R., & Hunsicker, S. (2012). Hybrid parallel sentence mining from comparable corpora. In Proceedings of the16th Conference of the European Association for Machine Translation (EAMT 2012) (pp. 137\u2013144), May 28\u201330, 2012, Trento, Italy.","key":"5_CR44"},{"unstructured":"Steinberger, R., Pouliquen, B., Widiger, A., Ignat, C., Erjavec, T., Tufi\u0219, D., et al. (2006). The JRC-Acquis: A multilingual aligned parallel corpus with 20+ languages. In Proceedings of the 5th International Conference on Language Resources and Evaluation (LREC\u20192006), May 24\u201326, 2006, Genoa, Italy.","key":"5_CR45"},{"unstructured":"Steinberger, R., Eisele, A., Klocek, A., Pilos, S., & Schl\u00fcter, P. (2012). DGT-TM: A freely Available Translation Memory in 22 Languages. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC\u20192012), May 21\u201327, 2012, Istanbul, Turkey.","key":"5_CR46"},{"doi-asserted-by":"crossref","unstructured":"Stolcke, A. (2002). SRILM \u2013 An extensible language modeling toolkit. In Proceedings of the International Conference of Spoken Language Processing (ICSLP 2002) (pp. 901\u2013904), September 2002, Denver, CO.","key":"5_CR47","DOI":"10.21437\/ICSLP.2002-303"},{"issue":"5","key":"5_CR48","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1007\/s10791-008-9058-8","volume":"11","author":"T Talvensaari","year":"2008","unstructured":"Talvensaari, T., Pirkola, A., J\u00e4rvelin, K., Juhola, M., & Laurikkala, J. (2008). Focused web crawling in the acquisition of comparable corpora. Information Retrieval, 11(5), 427\u2013445.","journal-title":"Information Retrieval"},{"unstructured":"Thi Ngoc Diep, D., Besacier, L., Castelli, E. (2010). A fully unsupervised approach for mining parallel data from comparable corpora. In Proceedings of the 14th Annual Conference of the European Association for Machine Translation (EAMT 2010), May 27\u201328, 2010, Saint-Rapha\u00ebl, France.","key":"5_CR50"},{"doi-asserted-by":"crossref","unstructured":"Tillmann, C. (2009). A beam-search extraction algorithm for comparable data. In Proceedings of the ACL-IJCNLP 2009 Conference Short Papers (pp. 225\u2013228), Suntec, Singapore, August 4th, 2009.","key":"5_CR51","DOI":"10.3115\/1667583.1667653"},{"unstructured":"Tsvetkov, Y., & Wintner, S. (2010). Automatic acquisition of parallel corpora from websites with dynamic content. In Proceedings of the Seventh Conference on International Language Resources and Evaluation (LREC\u201910) (pp. 3389\u20133392), Valletta, Malta, May 2010.","key":"5_CR52"},{"unstructured":"Tufi\u0219, D., Ion, R., Ceau\u0219u, A., & \u0218tef\u0103nescu, D. (2006). Improved lexical alignment by combining multiple reified alignments. In Proceedings of the11th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2006) (pp. 153\u2013160), Trento, Italy, April 3\u20137 2006.","key":"5_CR53"},{"key":"5_CR54","first-page":"441","volume-title":"Proceedings of 4th Global WordNet Conference, GWC-2008, January 2008","author":"D Tufi\u0219","year":"2008","unstructured":"Tufi\u0219, D., Ion, R., Bozianu, L., Ceau\u0219u, A., & \u0218tef\u0103nescu, D. (2008). Romanian wordnet: Current state, new applications and prospects. In A. Tanacs, D. Csendes, V. Vincze, C. Fellbaum, & P. Vossen (Eds.), Proceedings of 4th Global WordNet Conference, GWC-2008, January 2008 (pp. 441\u2013452). Hungary: University of Szeged."},{"doi-asserted-by":"crossref","unstructured":"Zhang, Y., Wu, K., Gao, J., & Vines, P. (2006). Automatic acquisition of Chinese-English parallel corpus from the web. In Proceedings of 28th European Conference on Information Retrieval ECIR 2006, April 10\u201312, 2006, London.","key":"5_CR55","DOI":"10.1007\/11735106_37"}],"container-title":["Theory and Applications of Natural Language Processing","Using Comparable Corpora for Under-Resourced Areas of Machine Translation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99004-0_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,11]],"date-time":"2022-09-11T13:34:24Z","timestamp":1662903264000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99004-0_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783319990033","9783319990040"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99004-0_5","relation":{},"ISSN":["2192-032X","2192-0338"],"issn-type":[{"type":"print","value":"2192-032X"},{"type":"electronic","value":"2192-0338"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"7 February 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}