{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T19:59:45Z","timestamp":1759694385318},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2008,3,15]],"date-time":"2008-03-15T00:00:00Z","timestamp":1205539200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2008,3,15]],"date-time":"2008-03-15T00:00:00Z","timestamp":1205539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Retrieval"],"published-print":{"date-parts":[[2008,10]]},"DOI":"10.1007\/s10791-008-9058-8","type":"journal-article","created":{"date-parts":[[2008,3,14]],"date-time":"2008-03-14T17:18:16Z","timestamp":1205515096000},"page":"427-445","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":31,"title":["Focused web crawling in the acquisition of comparable corpora"],"prefix":"10.1007","volume":"11","author":[{"given":"Tuomas","family":"Talvensaari","sequence":"first","affiliation":[]},{"given":"Ari","family":"Pirkola","sequence":"additional","affiliation":[]},{"given":"Kalervo","family":"J\u00e4rvelin","sequence":"additional","affiliation":[]},{"given":"Martti","family":"Juhola","sequence":"additional","affiliation":[]},{"given":"Jorma","family":"Laurikkala","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2008,3,15]]},"reference":[{"key":"9058_CR1","unstructured":"Allan, J., Callan, J. P., Croft, W. B., Ballesteros, L., Broglio, J., Xu, J., & Shu, H. (1996). Inquery at TREC-5. In TREC-5: The Fifth Text Retrieval Conference (pp. 119\u2013132). National Institute of Standards and Technology."},{"key":"9058_CR2","unstructured":"Bra, P. D., Houben, G.-J., Kornatzky, Y., & Post, R. (1994). Information retrieval in distributed hypertexts. In Proceedings of the 4th RIAO Conference (pp. 481\u2013491)."},{"key":"9058_CR3","doi-asserted-by":"crossref","unstructured":"Braschler, M., & Sch\u00e4uble, P. (1998). Multilingual information retrieval based on document alignment techniques. In ECDL \u201998: Proceedings of the Second European Conference on Research and Advanced Technology for Digital Libraries (pp. 183\u2013197). London: Springer-Verlag.","DOI":"10.1007\/3-540-49653-X_12"},{"key":"9058_CR4","unstructured":"Cavnar, W. B., & Trenkle, J. M. (1994). N-gram-based text categorization. In Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis and Information Retrieval (pp. 161\u2013175)."},{"key":"9058_CR5","doi-asserted-by":"crossref","unstructured":"Chakrabarti, S., van den Berg, M., & Dom, B. (1999). Focused crawling: A new approach to topic-specific web resource discovery. In WWW \u201999: Proceeding of the Eighth International Conference on World Wide Web (pp. 1623\u20131640). New York: Elsevier North-Holland, Inc.","DOI":"10.1016\/S1389-1286(99)00052-3"},{"key":"9058_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, P.-J., Teng, J.-W., Chen, R.-C., Wang, J.-H., Lu, W.-H., & Chien, L.-F. (2004). Translating unknown queries with web corpora for cross-language information retrieval. In SIGIR \u201904: Proceedings of the 27th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 146\u2013153). New York: ACM Press.","DOI":"10.1145\/1008992.1009020"},{"key":"9058_CR7","doi-asserted-by":"crossref","unstructured":"Cho, J., Garcia-Molina, H., & Page, L. (1998). Efficient crawling through URL ordering. In WWW7: Proceedings of the Seventh International Conference on World Wide Web (pp. 161\u2013172). Amsterdam: Elsevier Science Publishers B. V.","DOI":"10.1016\/S0169-7552(98)00108-1"},{"key":"9058_CR8","doi-asserted-by":"crossref","unstructured":"Gale, W. A., & Church, K. W. (1991). A program for aligning sentences in bilingual corpora. In ACL \u201991: Proceedings of the 29th Annual Meeting of the Association for Computational Linguistics (pp. 177\u2013184). Morristown, NJ: Association for Computational Linguistics.","DOI":"10.3115\/981344.981367"},{"key":"9058_CR9","unstructured":"Hassan, A., Fahmy, H., & Hassan, H. (2007). Improving named entity translation by exploiting comparable and parallel corpora. In Proceedings of the 2007 Conference on Recent Advances in Natural Language Processing (RANLP), AMML Workshop."},{"issue":"1","key":"9058_CR10","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1145\/1067268.1067273","volume":"39","author":"W. R. Hersh","year":"2005","unstructured":"Hersh, W. R. (2005). Report on the TREC 2004 genomics track. SIGIR Forum, 39(1), 21\u201324.","journal-title":"SIGIR Forum"},{"key":"9058_CR11","doi-asserted-by":"crossref","unstructured":"Keskustalo, H., Hedlund, T., & Airio, E. (2002). Utaclir \u2013 general query translation framework for several language pairs. In SIGIR \u201902: Proceedings of the 25th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 448\u2013448). New York: ACM Press.","DOI":"10.1145\/564376.564489"},{"key":"9058_CR12","unstructured":"Lemur homepage. (2008). The Lemur toolkit homepage. http:\/\/www.lemurproject.org\/. Accessed 22 February 2008."},{"key":"9058_CR13","doi-asserted-by":"crossref","unstructured":"Nie, J.-Y., Simard, M., Isabelle, P., & Durand, R. (1999). Cross-language information retrieval based on parallel texts and automatic mining of parallel texts from the web. In SIGIR \u201999: Proceedings of the 22nd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 74\u201381). New York: ACM Press.","DOI":"10.1145\/312624.312656"},{"key":"9058_CR14","doi-asserted-by":"crossref","unstructured":"Pirkola, A. (1998). The effects of query structure and dictionary setups in dictionary-based cross-language information retrieval. In SIGIR \u201998: Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 55\u201363). New York: ACM Press.","DOI":"10.1145\/290941.290957"},{"issue":"3\u20134","key":"9058_CR15","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1023\/A:1011994105352","volume":"4","author":"A. Pirkola","year":"2001","unstructured":"Pirkola, A., Hedlund, T., Keskustalo, H., & J\u00e4rvelin, K. (2001). Dictionary-based cross-language information retrieval: Problems, methods, and research findings. Information Retrieval, 4(3\u20134), 209\u2013230.","journal-title":"Information Retrieval"},{"key":"9058_CR16","doi-asserted-by":"crossref","unstructured":"Pirkola, A., Toivonen, J., Keskustalo, H., & J\u00e4rvelin, K. (2006). FITE-TRT: A high quality translation technique for OOV words. In SAC \u201906: Proceedings of the 2006 ACM Symposium on Applied Computing (pp. 1043\u20131049). New York: ACM Press.","DOI":"10.1145\/1141277.1141525"},{"key":"9058_CR17","doi-asserted-by":"crossref","unstructured":"Resnik, P. (1999). Mining the web for bilingual text. In Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics on Computational Linguistics (pp. 527\u2013534). Morristown, NJ: Association for Computational Linguistics.","DOI":"10.3115\/1034678.1034757"},{"key":"9058_CR18","doi-asserted-by":"crossref","unstructured":"Sheridan, P., & Ballerini, J. P. (1996). Experiments in multilingual information retrieval using the SPIDER system. In SIGIR \u201996: Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 58\u201365). New York: ACM Press.","DOI":"10.1145\/243199.243213"},{"key":"9058_CR19","volume-title":"Nonparametric statistics for the behavioral sciences","author":"S. Siegel","year":"1988","unstructured":"Siegel, S., & Castellan, N. J., Jr. (1988). Nonparametric statistics for the behavioral sciences. New York: McGraw-Hill."},{"key":"9058_CR20","doi-asserted-by":"crossref","unstructured":"Singhal, A., Buckley, C., & Mitra, M. (1996). Pivoted document length normalization. In SIGIR \u201996: Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (pp. 21\u201329). New York: ACM Press.","DOI":"10.1145\/243199.243206"},{"issue":"4","key":"9058_CR21","doi-asserted-by":"publisher","first-page":"257","DOI":"10.2498\/cit.2005.04.01","volume":"13","author":"R. Steinberger","year":"2005","unstructured":"Steinberger, R., Pouliquen, B., & Ignat, C. (2005). Navigating multilingual news collections using automatically extracted information. Journal of Computing and Information Technology, 13(4), 257\u2013264.","journal-title":"Journal of Computing and Information Technology"},{"key":"9058_CR22","unstructured":"Steinberger, R., Pouliquen, B., Widiger, A., Ignat, C., Erjavec, T., Tufi\u015f, D., & Varga, D. (2006). The JRC-Acquis: A multilingual aligned parallel corpus with 20+ languages. In LREC \u20192006: Proceedings of the 5th International Conference on Language Resources and Evaluation."},{"issue":"1","key":"9058_CR23","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1145\/1198296.1198300","volume":"25","author":"T. Talvensaari","year":"2007","unstructured":"Talvensaari, T., Laurikkala, J., J\u00e4rvelin, K., Juhola, M., & Keskustalo, H. (2007). Creating and exploiting a comparable corpus in cross-language information retrieval. ACM Transactions on Information Systems, 25(1), 4.","journal-title":"ACM Transactions on Information Systems"},{"key":"9058_CR24","doi-asserted-by":"crossref","unstructured":"Utsuro, T., Horiuchi, T., Chiba, Y., & Hamamoto, T. (2002). Semi-automatic compilation of bilingual lexicon entries from cross-lingually relevant news articles on WWW news sites. In AMTA \u201902: Proceedings of the 5th Conference of the Association for Machine Translation in the Americas on Machine Translation: From Research to Real Users (pp. 165\u2013176). London: Springer-Verlag.","DOI":"10.1007\/3-540-45820-4_17"},{"issue":"6","key":"9058_CR25","doi-asserted-by":"publisher","first-page":"939","DOI":"10.1016\/j.ipm.2003.11.002","volume":"40","author":"C. C. Yang","year":"2004","unstructured":"Yang, C. C., & Li, K. W. (2004). Building parallel corpora by automatic title alignment using length-based and text-based approaches. Information Processing & Management, 40(6), 939\u2013955.","journal-title":"Information Processing & Management"}],"container-title":["Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-008-9058-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10791-008-9058-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-008-9058-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-008-9058-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,2]],"date-time":"2024-01-02T14:35:54Z","timestamp":1704206154000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10791-008-9058-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,3,15]]},"references-count":25,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2008,10]]}},"alternative-id":["9058"],"URL":"https:\/\/doi.org\/10.1007\/s10791-008-9058-8","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008,3,15]]},"assertion":[{"value":"4 July 2007","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 February 2008","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 March 2008","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}