{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:36:58Z","timestamp":1740123418218,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,2,10]],"date-time":"2017-02-10T00:00:00Z","timestamp":1486684800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"ModernMT EU Project","award":["H2020 grant agreement no. 645487"],"award-info":[{"award-number":["H2020 grant agreement no. 645487"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Translation"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1007\/s10590-017-9191-5","type":"journal-article","created":{"date-parts":[[2017,2,10]],"date-time":"2017-02-10T00:59:14Z","timestamp":1486688354000},"page":"93-115","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Automatic translation memory cleaning"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8811-4330","authenticated-orcid":false,"given":"Matteo","family":"Negri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Duygu","family":"Ataman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masoud Jalili","family":"Sabet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marco","family":"Turchi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcello","family":"Federico","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,2,10]]},"reference":[{"issue":"4","key":"9191_CR1","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1007\/s10590-011-9114-9","volume":"25","author":"S Abdul Rauf","year":"2011","unstructured":"Abdul Rauf S, Schwenk H (2011) Parallel sentence generation from comparable corpora for improved SMT. Mach Transl 25(4):341\u2013375","journal-title":"Mach Transl"},{"key":"9191_CR2","unstructured":"Arthern P (1979) Machine translation and computerized terminology systems: a translator\u2019s viewpoint. In: Translating and the computer, proceedings of a seminar, London, UK, pp 77\u2013108"},{"key":"9191_CR3","unstructured":"Barbu E (2015) Spotting false translation segments in translation memories. In: Proceedings of the workshop on natural language processing for translation memories, Hissar, Bulgaria, pp 9\u201316"},{"key":"9191_CR4","unstructured":"Barbu E, Parra\u00a0Escart\u00edn C, Bentivogli L, Negri M, Turchi M, Federico M, Mastrostefano L, Orasan C (2016) 1st shared task on automatic translation memory cleaning. In: Proceedings of the 2nd Workshop on natural language processing for translation memories (NLP4TM 2016). Portoro\u017e, Slovenia, pp 1\u20135"},{"key":"9191_CR5","doi-asserted-by":"crossref","unstructured":"Bi\u00e7ici E, Dymetman M (2008) Dynamic translation memory: using statistical machine translation to improve translation memory fuzzy matches. In: Proceedings of the 9th international conference on computational linguistics and intelligent text processing, CICLing\u201908, Haifa, Israel, pp 454\u2013465","DOI":"10.1007\/978-3-540-78135-6_39"},{"key":"9191_CR6","doi-asserted-by":"crossref","unstructured":"Bloodgood M, Strauss B (2014) Translation memory retrieval methods. In: Proceedings of the 14th conference of the European chapter of the association for computational linguistics, Gothenburg, Sweden, pp\u00a0202\u2013210","DOI":"10.3115\/v1\/E14-1022"},{"issue":"2","key":"9191_CR7","first-page":"263","volume":"19","author":"PF Brown","year":"2003","unstructured":"Brown PF, Della Pietra SA, Della Pietra VJ, Mercer RL (2003) The mathematics of statistical machine translation: parameter estimation. Comput Linguist 19(2):263\u2013311","journal-title":"Comput Linguist"},{"key":"9191_CR8","unstructured":"Burchardt A, Lommel A (2014) Practical guidelines for the use of MQM in scientific research on translation quality. Technical report, DFKI, Berlin, Germany"},{"key":"9191_CR9","unstructured":"Camargo\u00a0de Souza JG, Buck C, Turchi M, Negri M (2013) FBK-UEdin participation to the WMT13 quality estimation shared task. In: Proceedings of the eighth workshop on statistical machine translation, Sofia, Bulgaria, pp\u00a0352\u2013358"},{"key":"9191_CR10","unstructured":"Chatzitheodoroou K (2015) Improving translation memory fuzzy matching by paraphrasing. In: Proceedings of the workshop on natural language processing for translation memories, Hissar, Bulgaria, pp\u00a024\u201330"},{"key":"9191_CR11","unstructured":"Chu C, Nakazawa T, Kurohashi S (2013) Chinese\u2013Japanese parallel sentence extraction from quasi\u2013comparable corpora. In: Proceedings of the sixth workshop on building and using comparable corpora, Sofia, Bulgaria, pp\u00a034\u201342"},{"key":"9191_CR12","doi-asserted-by":"crossref","unstructured":"Cotterell R, Sch\u00fctze H, Eisner J (2016) Morphological smoothing and extrapolation of word embeddings. In: Proceedings of the 54th annual meeting of the association for computational linguistics (volume 1: long papers), Berlin, Germany, pp\u00a01651\u20131660","DOI":"10.18653\/v1\/P16-1156"},{"key":"9191_CR13","unstructured":"Denkowski M, Hanneman G, Lavie A (2012) The CMU-avenue French\u2013English translation system. In: Proceedings of the seventh workshop on statistical machine translation, Montr\u00e9al, Canada, pp\u00a0261\u2013266"},{"key":"9191_CR14","unstructured":"Dyer C, Clark J, Lavie A, Smith NA (2011) Unsupervised word alignment with arbitrary features. In: Proceedings of the 49th annual meeting of the association for computational linguistics: human language technologies-volume 1, Portland, Oregon, USA, pp\u00a0409\u2013419"},{"issue":"3\u20134","key":"9191_CR15","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1007\/s10590-015-9176-1","volume":"29","author":"S Eetemadi","year":"2015","unstructured":"Eetemadi S, Lewis W, Toutanova K, Radha H (2015) Survey of data-selection methods in statistical machine translation. Mach Transl 29(3\u20134):189\u2013223","journal-title":"Mach Transl"},{"key":"9191_CR16","doi-asserted-by":"crossref","unstructured":"Gao Q, Vogel S (2008) Parallel implementations of word alignment tool. In: Proceedings of the ACL 2008 software engineering, testing, and quality assurance workshop, Columbus, Ohio, USA, pp\u00a049\u201357","DOI":"10.3115\/1622110.1622119"},{"issue":"1","key":"9191_CR17","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s10994-006-6226-1","volume":"63","author":"P Geurts","year":"2006","unstructured":"Geurts P, Ernst D, Wehenkel L (2006) Extremely randomized trees. Mach Learn 63(1):3\u201342","journal-title":"Mach Learn"},{"key":"9191_CR18","unstructured":"Gupta R, Bechara H, Orasan C (2014) Intelligent translation memory matching and retrieval metric exploiting linguistic technology. In: Proceedings of translating and the computer 36, London, UK, pp\u00a086\u201389"},{"key":"9191_CR19","unstructured":"Gupta R, Orasan C, Zampieri M, Vela M, Van\u00a0Genabith J (2015) Can translation memories afford not to use paraphrasing? In: Proceedings of the 18th annual conference of the European association for machine translation, Antalya, Turkey, pp\u00a035\u201342"},{"key":"9191_CR20","doi-asserted-by":"crossref","unstructured":"Khadivi S, Ney H (2005) Automatic filtering of bilingual corpora for statistical machine translation. In: Proceedings of natural language processing and information systems, 10th international conference on applications of natural language to information systems, Alicante, Spain, pp\u00a0263\u2013274","DOI":"10.1007\/11428817_24"},{"key":"9191_CR21","unstructured":"Koehn P, Senellart J (2010) Convergence of translation memory and statistical machine translation. In: Proceedings of AMTA workshop on MT research and the translation industry, Denver, CO, USA, pp\u00a021\u201331"},{"key":"9191_CR22","first-page":"707","volume":"10","author":"VI Levenshtein","year":"1966","unstructured":"Levenshtein VI (1966) Binary codes capable of correcting deletions, insertions, and reversals. Sov Phys Dokl 10:707\u2013710","journal-title":"Sov Phys Dokl"},{"key":"9191_CR23","unstructured":"Lommel A (2015) Multidimensional quality metrics (MQM) definition. Technical report, DFKI, Berlin, Germany"},{"key":"9191_CR24","unstructured":"Lui M, Baldwin T (2012) langid.py: an off-the-shelf language identification tool. In: Proceedings of the ACL 2012 system demonstrations, Jeju Island, Korea, pp\u00a025\u201330"},{"key":"9191_CR25","unstructured":"Ma Y, He Y, Way A, Van Genabith J (2011) Consistent translation using discriminative learning: a translation memory-inspired approach. In: Proceedings of the 49th annual meeting of the association for computational linguistics: human language technologies, volume 1, Portland, Oregon, USA, pp\u00a01239\u20131248"},{"issue":"4","key":"9191_CR26","doi-asserted-by":"crossref","first-page":"417","DOI":"10.1111\/j.1467-9868.2010.00740.x","volume":"72","author":"N Meinshausen","year":"2010","unstructured":"Meinshausen N, B\u00fchlmann P (2010) Stability selection. J R Stat Soc B Stat Methodol 72(4):417\u2013473","journal-title":"J R Stat Soc B Stat Methodol"},{"issue":"4","key":"9191_CR27","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1162\/089120105775299168","volume":"31","author":"DS Munteanu","year":"2005","unstructured":"Munteanu DS, Marcu D (2005) Improving machine translation performance by exploiting non-parallel corpora. Comput Linguist 31(4):477\u2013504","journal-title":"Comput Linguist"},{"key":"9191_CR28","unstructured":"Nakazawa T, Kurohashi S (2011) Bayesian subtree alignment model based on dependency trees. In: Proceedings of 5th international joint conference on natural language processing, Chiang Mai, Thailand, pp\u00a0794\u2013802"},{"key":"9191_CR29","unstructured":"Negri M, Marchetti A, Mehdad Y, Bentivogli L, Giampiccolo D (2012) Semeval-2012 task 8: cross-lingual textual entailment for content synchronization. In: Proceedings of the 6th international workshop on semantic evaluation (SemEval 2012), Montr\u00e9al, Canada, pp\u00a0399\u2013407"},{"key":"9191_CR30","volume-title":"Computer intensive methods for testing hypothesis. An introduction","author":"EW Noreen","year":"1989","unstructured":"Noreen EW (1989) Computer intensive methods for testing hypothesis. An introduction. Wiley, New York"},{"key":"9191_CR31","unstructured":"Rarrick S, Quirk C, Lewis W (2011) MT detection in web-scraped parallel corpora. In: MT summit XIII: the thirteenth machine translation summit, Xiamen, China, pp 422\u2013429"},{"key":"9191_CR32","unstructured":"Riesa J, Marcu D (2012) Automatic parallel fragment extraction from noisy data. In: Proceedings of the 2012 conference of the North American chapter of the association for computational linguistics: human language technologies, Montr\u00e9al, Canada, pp\u00a0538\u2013542"},{"issue":"6","key":"9191_CR33","first-page":"39","volume":"18","author":"R Sikes","year":"2007","unstructured":"Sikes R (2007) Fuzzy matching in theory and practice. Multilingual 18(6):39\u201343","journal-title":"Multilingual"},{"key":"9191_CR34","unstructured":"Snover M, Dorr B, Schwartz R, Micciulla L, Makhoul J (2006) A study of translation edit rate with targeted human annotation. In: AMTA 2006: proceedings of the 7th conference of the association for machine translation in the Americas, visions for the future of machine translation, Cambridge, Massachusetts, USA, pp\u00a0223\u2013231"},{"key":"9191_CR35","doi-asserted-by":"crossref","unstructured":"S\u00f8gaard A, Agi\u0107 V, Mart\u00ednez\u00a0Alonso H, Plank B, Bohnet B, Johannsen A (2015) Inverted indexing for cross-lingual NLP. In: Proceedings of the 53rd annual meeting of the association for computational linguistics and the 7th international joint conference on natural language processing (volume 1: long papers), Beijing, China, pp\u00a01713\u20131722","DOI":"10.3115\/v1\/P15-1165"},{"key":"9191_CR36","unstructured":"Specia L, Cancedda N, Dymetman M, Turchi M, Cristianini N (2009) Estimating the sentence-level quality of machine translation systems. In: Proceedings of the 13th annual conference of the European association for machine translation (EAMT-2009), Barcelona, Spain, pp\u00a028\u201335"},{"key":"9191_CR37","doi-asserted-by":"crossref","unstructured":"Tillmann C (2009) A beam-search extraction algorithm for comparable data. In: Proceedings of the ACL-IJCNLP 2009 conference short papers, Singapore, pp\u00a0225\u2013228","DOI":"10.3115\/1667583.1667653"},{"key":"9191_CR38","unstructured":"Turchi M, Negri M, Federico M (2013) Coping with the subjectivity of human judgements in MT quality estimation. In: Proceedings of the eighth workshop on statistical machine translation, Sofia, Bulgaria, pp\u00a0240\u2013251"},{"issue":"3","key":"9191_CR39","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1007\/s10590-014-9162-z","volume":"28","author":"M Turchi","year":"2014","unstructured":"Turchi M, Negri M, Federico M (2014) Data-driven annotation of binary MT quality estimation corpora based on human post-editions. Mach Transl 28(3):281\u2013308","journal-title":"Mach Transl"},{"key":"9191_CR40","unstructured":"Vanallemeersch T, Vandeghinste V (2014) Improving fuzzy matching through syntactic knowledge. In: Proceedings of translating and the computer 36, London, pp\u00a0217\u2013227"},{"key":"9191_CR41","unstructured":"Vanallemeersch T, Vandeghinste V (2015) Assessing linguistically aware fuzzy matching in translation memories. In: Proceedings of the 18th annual conference of the European association for machine translation, Antalya, Turkey, pp\u00a0153\u2013160"},{"key":"9191_CR42","unstructured":"Wang K, Zong C, Su KY (2013) Integrating translation memory into phrase-based machine translation during decoding. In: Proceedings of the 51st annual meeting of the association for computational linguistics (volume 1: long papers), Sofia, Bulgaria, pp\u00a011\u201321"},{"key":"9191_CR43","doi-asserted-by":"crossref","unstructured":"Yeh A (2000) More accurate tests for the statistical significance of result differences. In: The 18th international conference on computational linguistics, COLING 2000 in Europe, proceedings of the conference, volume 2, Saarbr\u00fccken, Germany, pp\u00a0947\u2013953","DOI":"10.3115\/992730.992783"},{"key":"9191_CR44","unstructured":"Zhechev V, Van Genabith J (2010) Seeding statistical machine translation with translation memory output through tree-based structural alignment. In: Proceedings of the 4th workshop on syntax and structure in statistical translation, Beijing, China, pp\u00a043\u201351"}],"container-title":["Machine Translation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10590-017-9191-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10590-017-9191-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10590-017-9191-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,11,25]],"date-time":"2017-11-25T03:26:06Z","timestamp":1511580366000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10590-017-9191-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,10]]},"references-count":44,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,9]]}},"alternative-id":["9191"],"URL":"https:\/\/doi.org\/10.1007\/s10590-017-9191-5","relation":{},"ISSN":["0922-6567","1573-0573"],"issn-type":[{"type":"print","value":"0922-6567"},{"type":"electronic","value":"1573-0573"}],"subject":[],"published":{"date-parts":[[2017,2,10]]}}}