{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T20:00:03Z","timestamp":1759694403604,"version":"3.40.3"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2012,4,27]],"date-time":"2012-04-27T00:00:00Z","timestamp":1335484800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,4,27]],"date-time":"2012-04-27T00:00:00Z","timestamp":1335484800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Retrieval"],"published-print":{"date-parts":[[2013,2]]},"DOI":"10.1007\/s10791-012-9194-z","type":"journal-article","created":{"date-parts":[[2012,4,26]],"date-time":"2012-04-26T06:02:29Z","timestamp":1335420149000},"page":"1-29","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Leveraging comparable corpora for cross-lingual information retrieval in resource-lean language pairs"],"prefix":"10.1007","volume":"16","author":[{"given":"Azadeh","family":"Shakery","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"ChengXiang","family":"Zhai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2012,4,27]]},"reference":[{"key":"9194_CR1","doi-asserted-by":"crossref","unstructured":"Abdul-Rauf, S., & Schwenk, H. (2009). Exploiting comparable corpora with TER and TERp. In BUCC \u201809: Proceedings of the 2nd workshop on building and using comparable corpora (pp. 46\u201354).","DOI":"10.3115\/1690339.1690351"},{"key":"9194_CR2","doi-asserted-by":"crossref","unstructured":"Aljlayl, M., & Frieder, O. (2001). Effective Arabic\u2013English cross-language information retrieval via machine-readable dictionaries and machine translation. In CIKM \u201801: Proceedings of the 10th international conference on information and knowledge management (pp. 295\u2013302).","DOI":"10.1145\/502585.502635"},{"key":"9194_CR3","doi-asserted-by":"crossref","unstructured":"Ballesteros, L., & Croft, W. B. (1997). Phrasal translation and query expansion techniques for cross-language information retrieval. In SIGIR \u201897: Proceedings of the 20th annual international ACM SIGIR conference on research and development in information retrieval (pp. 84\u201391).","DOI":"10.1145\/258525.258540"},{"key":"9194_CR4","doi-asserted-by":"crossref","unstructured":"Braschler, M., Ripplinger, B., & Sch\u00e4uble, P. (2002). Experiments with the eurospider retrieval system for CLEF 2001. In CLEF \u201801: Revised papers from the 2nd workshop of the cross-language evaluation forum on evaluation of cross-language information retrieval systems (pp. 102\u2013110).","DOI":"10.1007\/3-540-45691-0_8"},{"key":"9194_CR5","doi-asserted-by":"crossref","unstructured":"Braschler, M., & Sch\u00e4uble, P. (1998). Multilingual information retrieval based on document alignment techniques. In ECDL \u201898: Proceedings of the 2nd European conference on research and advanced technology for digital libraries (pp. 183\u2013197).","DOI":"10.1007\/3-540-49653-X_12"},{"key":"9194_CR6","doi-asserted-by":"crossref","unstructured":"Braschler, M., & Sch\u00e4uble, P. (2001). Experiments with the Eurospider retrieval system for CLEF 2000. In CLEF \u201800: Revised papers from the workshop of cross-language evaluation forum on cross-language information retrieval and evaluation (pp. 140\u2013148).","DOI":"10.1007\/3-540-44645-1_13"},{"issue":"1","key":"9194_CR7","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/S0306-4573(99)00047-3","volume":"36","author":"C. Buckley","year":"2000","unstructured":"Buckley, C., Mitra, M., Walz, J., & Cardie, C. (2000). Using clustering and superconcepts within SMART: TREC 6. Information Processing and Management, 36(1), 109\u2013131.","journal-title":"Information Processing and Management"},{"key":"9194_CR8","doi-asserted-by":"crossref","unstructured":"Cao, G., Gao, J., Nie, J. Y., & Bai, J. (2007). Extending query translation to cross-language query expansion with Markov chain models. In CIKM \u201807: Proceedings of the 16th ACM conference on information and knowledge management (pp. 351\u2013360).","DOI":"10.1145\/1321440.1321491"},{"issue":"1-2","key":"9194_CR9","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1023\/B:INRT.0000009444.89549.90","volume":"7","author":"A. Chen","year":"2004","unstructured":"Chen, A., & Gey, F. C. (2004). Multilingual information retrieval using machine translation, relevance feedback and decompounding. Information Retrieval, 7(1\u20132), 149\u2013182.","journal-title":"Information Retrieval"},{"key":"9194_CR10","unstructured":"Desikan, P. K. (2009). Efficient computation of first-order markov measures on large evolving graphs. Ph.D. thesis, University of Minnesota, Minneapolis, MN, USA."},{"key":"9194_CR11","doi-asserted-by":"crossref","unstructured":"Dimitrova, L., Ide, N., Petkevic, V., Erjavec, T., Kaalep, H. J., & Tufis, D. (1998). MULTEXT-east: Parallel and comparable corpora and lexicons for six central and eastern European languages. In COLING \u201898: Proceedings of the 17th international conference on computational linguistics (Vol. 1, pp. 315\u2013319).","DOI":"10.3115\/980451.980897"},{"issue":"11","key":"9194_CR12","doi-asserted-by":"publisher","first-page":"2266","DOI":"10.1002\/asi.21337","volume":"61","author":"L. Dolamic","year":"2010","unstructured":"Dolamic, L., & Savoy, J. (2010) Retrieval effectiveness of machine translated queries. Journal of the American Society for Information Science and Technology, 61(11), 2266\u20132273.","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"9194_CR13","doi-asserted-by":"crossref","unstructured":"Franz, M., & McCarley, J. S. (2002). Arabic information retrieval at IBM. In Proceedings of the 11th text retrieval conference (TREC-2002).","DOI":"10.6028\/NIST.SP.500-251.xlingual-ibm-abe"},{"key":"9194_CR14","doi-asserted-by":"crossref","unstructured":"Franz, M., McCarley, J. S., & Roukos, S. (1999). Ad hoc and multilingual information retrieval at IBM. In Proceedings of the 7th text retrieval conference (TREC-7) (pp. 157\u2013168).","DOI":"10.6028\/NIST.SP.500-246.sdr-ibm-franz"},{"key":"9194_CR15","doi-asserted-by":"crossref","unstructured":"Fung, P., & Yee, L. Y. (1998). An IR approach for translating new words from nonparallel, comparable texts. In COLING \u201898: Proceedings of the 17th international conference on computational linguistics (pp. 414\u2013420).","DOI":"10.3115\/980451.980916"},{"key":"9194_CR16","unstructured":"Gey, F. C. (2004). Chinese and Korean topic search of Japanese news collections. In Working notes of the fourth NTCIR workshop on Asian language retrieval and question answering (pp. 214\u2013218)."},{"key":"9194_CR17","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198572237.001.0001","volume-title":"Probability and random processes","author":"G. Grimmett","year":"2001","unstructured":"Grimmett, G., & Stirzaker, D. (2001). Probability and random processes (3rd ed.). Oxford: Oxford University Press.","edition":"3"},{"key":"9194_CR18","unstructured":"Haveliwala, T. (1999). Efficient computation of PageRank. Technical report 1999-31, Stanford InfoLab."},{"issue":"3","key":"9194_CR19","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1145\/979872.979876","volume":"2","author":"D. He","year":"2003","unstructured":"He, D., Oard, D. W., Wang, J., Luo, J., Demner-Fushman, D., Darwish, K., et al. (2003). Making MIRACLEs: Interactive translingual search for Cebuano and Hindi. ACM Transactions on Asian Language Information Processing (TALIP), 2(3), 219\u2013244.","journal-title":"ACM Transactions on Asian Language Information Processing (TALIP)"},{"issue":"1-2","key":"9194_CR20","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1023\/B:INRT.0000009442.34054.55","volume":"7","author":"T. Hedlund","year":"2004","unstructured":"Hedlund, T., Airio, E., Keskustalo, H., Lehtokangas, R., Pirkola, A., & J\u00e4rvelin, K. (2004). Dictionary-based cross-language information retrieval: Learning experiences from CLEF 2000\u20132002. Information Retrieval, 7(1-2), 99\u2013119.","journal-title":"Information Retrieval"},{"key":"9194_CR21","doi-asserted-by":"crossref","unstructured":"Hiemstra, D., Kraaij, W., Pohlmann, R., & Westerveld, T. (2001). Translation resources, merging strategies, and relevance feedback for cross-language information retrieval. In CLEF \u201800: Revised papers from the workshop of cross-language evaluation forum on cross-language information retrieval and evaluation (pp. 102\u2013115).","DOI":"10.1007\/3-540-44645-1_10"},{"key":"9194_CR22","doi-asserted-by":"crossref","unstructured":"Hull, D., & Grefenstette, G. (1996). Querying across languages: a dictionary-based approach to multilingual information retrieval. In SIGIR \u201896: Proceedings of the 19th annual international ACM SIGIR conference on research and development in information retrieval (pp. 49\u201357).","DOI":"10.1145\/243199.243212"},{"key":"9194_CR23","doi-asserted-by":"crossref","unstructured":"Kwok, K. L. (1999). English\u2013Chinese cross-language retrieval based on a translation package. In Workshop of machine translation for cross language information retrieval, machine translation summit VII (pp. 8\u201313).","DOI":"10.1007\/3-540-39965-8_2"},{"issue":"3","key":"9194_CR24","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1016\/j.ipm.2004.06.012","volume":"41","author":"G. A. Levow","year":"2005","unstructured":"Levow, G. A., Oard, D. W., & Resnik, P. (2005). Dictionary-based techniques for cross-language information retrieval. Information Processing and Management, 41(3), 523\u2013547.","journal-title":"Information Processing and Management"},{"key":"9194_CR25","doi-asserted-by":"crossref","unstructured":"Masuichi, H., Flournoy, R., Kaufmann, S., Peters, S. (2000). A bootstrapping method for extracting bilingual text pairs. In COLING \u201800: Proceedings of the 18th conference on computational linguistics (pp. 1066\u20131070).","DOI":"10.3115\/992730.992806"},{"issue":"4","key":"9194_CR26","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1162\/089120105775299168","volume":"31","author":"D. S. Munteanu","year":"2005","unstructured":"Munteanu, D. S., & Marcu, D. (2005). Improving machine translation performance by exploiting non-parallel corpora. Computational Linguistics, 31(4), 477\u2013504.","journal-title":"Computational Linguistics"},{"key":"9194_CR27","doi-asserted-by":"crossref","unstructured":"Nie, J. Y., & Simard, M. (2002) Using statistical translation models for bilingual IR. In CLEF \u201801: Revised papers from the second workshop of the cross-language evaluation forum on evaluation of cross-language information retrieval systems (pp. 137\u2013150).","DOI":"10.1007\/3-540-45691-0_11"},{"key":"9194_CR28","doi-asserted-by":"crossref","unstructured":"Nie, J. Y., Simard, M., Isabelle, P., & Durand, R. (1999). Cross-language information retrieval based on parallel texts and automatic mining of parallel texts from the web. In SIGIR \u201899: Proceedings of the 22nd annual international ACM sigir conference on research and development in information retrieval (pp. 74\u201381).","DOI":"10.1145\/312624.312656"},{"key":"9194_CR29","first-page":"223","volume":"33","author":"D. W. Oard","year":"1998","unstructured":"Oard, D. W., & Diekema, A. R. (1998). Cross-language information retrieval. Annual Review of Information Science and Technology (ARIST), 33, 223\u2013256.","journal-title":"Annual Review of Information Science and Technology (ARIST)"},{"key":"9194_CR30","doi-asserted-by":"crossref","unstructured":"Oard, D. W., & Gey, F. C. (2002). The TREC 2002 Arabic\/English CLIR track. In Proceedings of the 11th text retrieval conference (TREC-2002) (pp. 17\u201326).","DOI":"10.6028\/NIST.SP.500-251.xlingual-overview"},{"key":"9194_CR31","doi-asserted-by":"crossref","unstructured":"Oard, D. W., & Hackett, P. (1997). Document translation for cross-language text retrieval at the University of Maryland. In Proceedings of the 6th text retrieval conference (TREC-6) (pp. 687\u2013696).","DOI":"10.6028\/NIST.SP.500-240.clir-UMd"},{"key":"9194_CR32","unstructured":"Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). The PageRank citation ranking: Bringing order to the web. Technical report 1999-66, Stanford InfoLab."},{"key":"9194_CR33","unstructured":"Picchi, E., & Peters, C. (1996). Cross language information retrieval: A system for comparable corpus querying. In Workshop on cross-linguistic information retrieval, SIGIR \u201896 (pp. 24\u201333)."},{"key":"9194_CR34","doi-asserted-by":"crossref","unstructured":"Rapp, R. (1995). Identifying word translations in non-parallel texts. In ACL \u201895: Proceedings of the 33rd annual meeting on association for computational linguisticss (pp. 320\u2013322).","DOI":"10.3115\/981658.981709"},{"key":"9194_CR35","doi-asserted-by":"crossref","unstructured":"Sadat, F., Yoshikawa, M., Uemura, S. (2003a). Bilingual terminology acquisition from comparable corpora and phrasal translation to cross-language information retrieval. In ACL \u201803: Proceedings of the 41st annual meeting on association for computational linguistics (pp. 141\u2013144).","DOI":"10.3115\/1075178.1075201"},{"key":"9194_CR36","doi-asserted-by":"crossref","unstructured":"Sadat, F., Yoshikawa, M., & Uemura, S. (2003b). Learning bilingual translations from comparable corpora to cross-language information retrieval: hybrid statistics-based and linguistics-based approach. In IRAL \u201803: Proceedings of the 6th international workshop on information retrieval with asian languages (pp. 57\u201364).","DOI":"10.3115\/1118935.1118943"},{"issue":"2","key":"9194_CR37","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1145\/1105696.1105701","volume":"4","author":"J. Savoy","year":"2005","unstructured":"Savoy, J. (2005). Comparative study of monolingual and multilingual search models for use with Asian languages. ACM transactions on asian language information processing (TALIP), 4(2), 163\u2013189.","journal-title":"ACM Transactions on Asian Language Information Processing (TALIP)"},{"key":"9194_CR38","doi-asserted-by":"crossref","unstructured":"Savoy, J., & Rasolofo, Y. (2002). Report on the TREC 11 experiment: Arabic, named page and topic distillation searches. In proceedings of the 11th text retrieval conference (TREC-2002) (pp. 765\u2013774).","DOI":"10.6028\/NIST.SP.500-251.xlingual-Neuchatel"},{"key":"9194_CR39","doi-asserted-by":"crossref","unstructured":"Shakery, A., & Zhai, C. (2006). A probabilistic relevance propagation model for hypertext retrieval. In CIKM \u201806: Proceedings of the 15th ACM international conference on information and knowledge managements (pp. 550\u2013558).","DOI":"10.1145\/1183614.1183693"},{"key":"9194_CR41","doi-asserted-by":"crossref","unstructured":"Sheridan, P., & Ballerini, J. P. (1996). Experiments in multilingual information retrieval using the SPIDER system. In SIGIR \u201896: Proceedings of the 19th annual international ACM SIGIR conference on research and development in information retrieval (pp. 58\u201365).","DOI":"10.1145\/243199.243213"},{"key":"9194_CR40","volume-title":"Building a large multilingual test collection from comparable news documents, chap. 11","author":"P. Sheridan","year":"1998","unstructured":"Sheridan, P., Ballerini, J., & Schauble, P. (1998). Building a large multilingual test collection from comparable news documents (Chap. 11). Boston, MA: Kluwer."},{"issue":"4","key":"9194_CR42","doi-asserted-by":"publisher","first-page":"257","DOI":"10.2498\/cit.2005.04.01","volume":"13","author":"R. Steinberger","year":"2005","unstructured":"Steinberger, R., Pouliquen, B., & Ignat, C. (2005). Navigating multilingual news collections using automatically extracted information. Computing and Information Technology, 13(4), 257\u2013264.","journal-title":"Computing and Information Technology"},{"key":"9194_CR43","doi-asserted-by":"publisher","unstructured":"Talvensaari, T., Laurikkala, J., J\u00e4rvelin, K., Juhola, M., & Keskustalo, H. (2007). Creating and exploiting a comparable corpus in cross-language information retrieval. ACM Transactions on Information Systems, 25(1), Article 4. doi:10.1145\/1198296.1198300.","DOI":"10.1145\/1198296.1198300"},{"issue":"5","key":"9194_CR44","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1007\/s10791-008-9058-8","volume":"11","author":"T. Talvensaari","year":"2008","unstructured":"Talvensaari, T., Pirkola, A., J\u00e4rvelin, K., Juhola, M., & Laurikkala, J. (2008). Focused web crawling in the acquisition of comparable corpora. Information Retrieval, 11(5), 427\u2013445.","journal-title":"Information Retrieval"},{"key":"9194_CR45","doi-asserted-by":"crossref","unstructured":"Tao, T., & Zhai, C. (2005). Mining comparable bilingual text corpora for cross-language information integration. In KDD \u201805: Proceedings of the 11th ACM SIGKDD international conference on knowledge discovery and data mining (pp. 691\u2013696).","DOI":"10.1145\/1081870.1081958"},{"key":"9194_CR46","doi-asserted-by":"crossref","unstructured":"Tomlinson, S. (2002). Experiments in named page finding and Arabic retrieval with Hummingbird SearchServer\u2122 at TREC 2002. In Proceedings of the 11th Text retrieval conference (TREC-2002) (pp. 248\u2013259).","DOI":"10.6028\/NIST.SP.500-251.xlingual-hummingbird"},{"key":"9194_CR47","doi-asserted-by":"crossref","unstructured":"Utsuro, T., Horiuchi, T., Chiba, Y., & Hamamoto, T. (2002). Semi-automatic compilation of bilingual lexicon entries from cross-lingually relevant news articles on WWW news sites. In AMTA \u201802: Proceedings of the 5th conference of the association for machine translation in the Americas on machine translation: From research to real users (pp. 165\u2013176).","DOI":"10.1007\/3-540-45820-4_17"},{"key":"9194_CR48","doi-asserted-by":"crossref","unstructured":"Vu, T., Aw, A. T., & Zhang, M. (2009). Feature-based method for document alignment in comparable news corpora. In EACL \u201809: Proceedings of the 12th conference of the European chapter of the association for computational linguistics (pp. 843\u2013851).","DOI":"10.3115\/1609067.1609161"},{"key":"9194_CR49","doi-asserted-by":"crossref","unstructured":"Xu, J., & Weischedel, R. (2000). Cross-lingual information retrieval using hidden Markov models. In Proceedings of the 2000 Joint SIGDAT conference on empirical methods in natural language processing and very large corpora: Held in conjunction with the 38th annual meeting of the association for computational linguistics (Vol. 13, pp. 95\u2013103).","DOI":"10.3115\/1117794.1117806"},{"key":"9194_CR50","doi-asserted-by":"crossref","unstructured":"Yu, K., & Tsujii, J. (2009). Extracting bilingual dictionary from comparable corpora with dependency heterogeneity. In HLT-NAACL \u201809: Proceedings of human language technologies: The 2009 annual conference of the North American chapter of the association for computational linguistics, companion volume: Short papers (pp. 121\u2013124).","DOI":"10.3115\/1620853.1620888"},{"issue":"4","key":"9194_CR51","doi-asserted-by":"publisher","first-page":"616","DOI":"10.7202\/004638ar","volume":"43","author":"F. Zanettin","year":"1998","unstructured":"Zanettin, F. (1998). Bilingual comparable corpora and the training of translators. META, 43(4), 616\u2013630.","journal-title":"META"},{"key":"9194_CR52","doi-asserted-by":"crossref","unstructured":"Zhai, C., & Lafferty, J. (2001). Model-based feedback in the language modeling approach to information retrieval. In CIKM \u201801: Proceedings of the 10th international conference on information and knowledge management (pp. 403\u2013410).","DOI":"10.1145\/502585.502654"},{"issue":"2","key":"9194_CR53","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1145\/984321.984322","volume":"22","author":"C. Zhai","year":"2004","unstructured":"Zhai, C., & Lafferty, J. (2004). A study of smoothing methods for language models applied to information retrieval. ACM Transactions on Information Systems, 22(2), 179\u2013214.","journal-title":"ACM Transactions on Information Systems"}],"container-title":["Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-012-9194-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10791-012-9194-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-012-9194-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-012-9194-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T18:35:55Z","timestamp":1743014155000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10791-012-9194-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,4,27]]},"references-count":53,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2013,2]]}},"alternative-id":["9194"],"URL":"https:\/\/doi.org\/10.1007\/s10791-012-9194-z","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"type":"print","value":"1386-4564"},{"type":"electronic","value":"1573-7659"}],"subject":[],"published":{"date-parts":[[2012,4,27]]},"assertion":[{"value":"20 January 2011","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 February 2012","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 April 2012","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}