{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T14:40:03Z","timestamp":1746196803332,"version":"3.40.4"},"publisher-location":"Berlin, Heidelberg","reference-count":126,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642453571"},{"type":"electronic","value":"9783642453588"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-642-45358-8_10","type":"book-chapter","created":{"date-parts":[[2014,4,22]],"date-time":"2014-04-22T10:16:03Z","timestamp":1398161763000},"page":"299-334","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Information Retrieval"],"prefix":"10.1007","author":[{"given":"Kareem","family":"Darwish","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,3,25]]},"reference":[{"key":"10_CR1","unstructured":"Abdelsapor A, Adly N, Darwish K, Emam O, Magdy W, Nagi M (2006) Building a heterogeneous information retrieval collection of printed Arabic documents. In: LREC 2006, Genoa"},{"key":"10_CR2","volume-title":"An-Nahw Ashamil","author":"A Abdul-Al-Aal","year":"1987","unstructured":"Abdul-Al-Aal A (1987) An-Nahw Ashamil. Maktabat Annahda Al-Masriya, Cairo"},{"key":"10_CR3","doi-asserted-by":"crossref","unstructured":"AbdulJaleel N, Larkey LS (2003) Statistical transliteration for English\u2013Arabic cross language information retrieval. In: CIKM\u201903, New Orleans, 3\u20138 Nov 2003","DOI":"10.1145\/956863.956890"},{"issue":"6","key":"10_CR4","doi-asserted-by":"publisher","first-page":"524","DOI":"10.1002\/(SICI)1097-4571(1999)50:6<524::AID-ASI7>3.0.CO;2-M","volume":"50","author":"H Abu-Salem","year":"1999","unstructured":"Abu-Salem H, Al-Omari M, Evens M (1999) Stemming methodologies over individual query words for Arabic information retrieval. JASIS 50(6):524\u2013529","journal-title":"JASIS"},{"key":"10_CR5","volume-title":"A large-scale computational processor of the Arabic morphology, and applications","author":"M Ahmed","year":"2000","unstructured":"Ahmed M (2000) A large-scale computational processor of the Arabic morphology, and applications. Faculty of Engineering, Cairo University, Cairo"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Ahmad F, Kondrak G (2005) Learning a spelling error model from search query logs. In: Proceedings of HLT-2005, Vancouver","DOI":"10.3115\/1220575.1220695"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Agirre E, Gojenola K, Sarasola K, Voutilainen A (1998) Towards a single proposal in spelling correction. In: Proceedings of COLING-ACL\u201998, San Francisco, pp\u00a022\u201328","DOI":"10.3115\/980451.980850"},{"key":"10_CR8","unstructured":"Alemayehu N (1999) Development of a stemming algorithm for Amharic language text retrieval. Ph.D. thesis, Dept. of Information Studies, University of Sheffield, Sheffield"},{"issue":"4","key":"10_CR9","first-page":"254","volume":"37","author":"N Alemayehu","year":"2003","unstructured":"Alemayehu N, Willett P (2003) The effectiveness of stemming for information retrieval in Amharic. Electron Libr Inf Syst 37(4):254\u2013259","journal-title":"Electron Libr Inf Syst"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Aljlayl M, Frieder O (2002) On Arabic search: improving the retrieval effectiveness via a light stemming approach. In: CIKM\u201902, McLean","DOI":"10.1145\/584792.584848"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Aljlayl M, Beitzel S, Jensen E, Chowdhury A, Holmes D, Lee M, Grossman D, Frieder O (2001) IIT at TREC-10. In: TREC 2001, Gaithersburg","DOI":"10.6028\/NIST.SP.500-250.xlingual-IIT"},{"issue":"8","key":"10_CR12","doi-asserted-by":"publisher","first-page":"548","DOI":"10.1002\/(SICI)1097-4571(199409)45:8<548::AID-ASI3>3.0.CO;2-X","volume":"45","author":"I Al-Kharashi","year":"1994","unstructured":"Al-Kharashi I, Evens M (1994) Comparing words, stems, and roots as index terms in an Arabic information retrieval system. JASIS 45(8):548\u2013560","journal-title":"JASIS"},{"key":"10_CR13","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1117\/12.205825","volume":"2422","author":"M Allam","year":"1995","unstructured":"Allam M (1995) Segmentation versus segmentation-free for recognizing Arabic text. Proc SPIE 2422:228\u2013235","journal-title":"Proc SPIE"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Argaw AA, Asker L (2007) An Amharic stemmer: reducing words to their citation forms. In: Proceedings of the 5th workshop on important unresolved matters, ACL-2007, Prague, pp\u00a0104\u2013110","DOI":"10.3115\/1654576.1654594"},{"issue":"1","key":"10_CR15","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1145\/322047.322052","volume":"25","author":"R Attar","year":"1978","unstructured":"Attar R, Choueka Y, Dershowitz N, Fraenkel AS (1978) KEDMA \u2013 linguistic tools for retrieval systems. J Assoc Comput Mach 25(1):52\u201366","journal-title":"J Assoc Comput Mach"},{"key":"10_CR16","first-page":"38","volume-title":"Document image defect models","author":"H Baird","year":"1990","unstructured":"Baird H (1990) Document image defect models. In: IAPR workshop on syntactic and structural pattern recognition, Murray Hill, pp\u00a038\u201346"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Baird H (1993) Document image defects models and their uses. In: Second international conference on document analysis and recognition (ICDAR), Tsukuba City, pp\u00a062\u201367","DOI":"10.1109\/ICDAR.1993.395781"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Beesley K (1996) Arabic finite-state morphological analysis and generation. In: COLING-96, Copenhagen","DOI":"10.3115\/992628.992647"},{"key":"10_CR19","volume-title":"Two-level finite-state analysis of Arabic morphology","author":"K Beesley","year":"1989","unstructured":"Beesley K, Buckwalter T, Newton S (1989) Two-level finite-state analysis of Arabic morphology. In: Proceedings of the seminar on bilingual computing in Arabic and English, Cambridge"},{"issue":"4\u20135","key":"10_CR20","first-page":"993","volume":"3","author":"D Blei","year":"2003","unstructured":"Blei D, Ng A, Jordan M (2003) Latent Dirichlet allocation. J Mach Learn Res 3(4\u20135):993\u20131022","journal-title":"J Mach Learn Res"},{"issue":"3\u20134","key":"10_CR21","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1023\/B:INRT.0000011208.60754.a1","volume":"7","author":"M Braschler","year":"2004","unstructured":"Braschler M, Ripplinger B (2004) How effective is stemming and decompounding for German text retrieval? Inf Retr J 7(3\u20134):291\u2013316","journal-title":"Inf Retr J"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Brill E, Moore R (2000) An improved error model for noisy channel spelling correction. In: Proceedings of the 38th annual meeting of the association for computational linguistics, ACL\u201900, Hong Kong, pp\u00a0286\u2013293","DOI":"10.3115\/1075218.1075255"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Burges C, Shaked T, Renshaw E, Lazier A, Deeds M, Hamilton N, Hullender G (2005) Learning to rank using gradient descent. In: Proceedings of the 22nd international conference on machine learning, Bonn","DOI":"10.1145\/1102351.1102363"},{"issue":"5","key":"10_CR24","doi-asserted-by":"publisher","first-page":"619","DOI":"10.1016\/0306-4573(92)90031-T","volume":"28","author":"B Burgin","year":"1992","unstructured":"Burgin B (1992) Variations in relevance judgments and the evaluation of retrieval performance. Inf Process Manage 28(5):619\u2013627","journal-title":"Inf Process Manage"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Carmel D, Maarek YS (1999) Morphological disambiguation for Hebrew search systems. In: NGITS-99, Zikhron-Yaakov","DOI":"10.1007\/3-540-48521-X_24"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Chenm A, Gey F (2002) Building an Arabic stemmer for information retrieval. In: TREC-2002, Gaithersburg","DOI":"10.6028\/NIST.SP.500-251.xlingual-berkeley"},{"key":"10_CR27","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1007\/BF02403764","volume":"14","author":"Y Choueka","year":"1980","unstructured":"Choueka Y (1980) Computerized full-text retrieval systems and research in the humanities: the Responsa project. Comput Hum 14:153\u2013169. North-Holland","journal-title":"Comput Hum"},{"key":"10_CR28","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/BF01889984","volume":"1","author":"K Church","year":"1991","unstructured":"Church K, Gale W (1991) Probability scoring for spelling correction. Stat Comput 1:93\u2013103","journal-title":"Stat Comput"},{"key":"10_CR29","unstructured":"Croft WB, Harding S, Taghva K, Andborsak J (1994) An evaluation of information retrieval accuracy with simulated OCR output. In: Proceedings of the 3rd annual symposium on document analysis and information retrieval, University of Nevada, Las Vegas, pp\u00a0115\u2013126"},{"key":"10_CR30","volume-title":"Building a shallow morphological analyzer in one day","author":"K Darwish","year":"2002","unstructured":"Darwish K (2002) Building a shallow morphological analyzer in one day. In: ACL workshop on computational approaches to Semitic languages, Philadelphia"},{"key":"10_CR31","unstructured":"Darwish K (2003) Probabilistic methods for searching OCR-degraded Arabic text. Ph.D. thesis, Electrical and Computer Engineering Department, University of Maryland, College Park"},{"key":"10_CR32","unstructured":"Darwish K, Ali A (2012) Arabic retrieval revisited: morphological hole filling. In: Proceedings of the 50th annual meeting of the Association for Computational Linguistics: short papers-volume 2, Jeju Island. ACL, pp 218\u2013222"},{"key":"10_CR33","unstructured":"Darwish K, Emam O (2005) The effect of blind relevance feedback on a new Arabic OCR degraded text collection. In: International conference on machine intelligence: special session on Arabic document image analysis, Tozeur, 5\u20137 Nov 2005"},{"issue":"1","key":"10_CR34","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1145\/1292591.1292596","volume":"26","author":"K Darwish","year":"2007","unstructured":"Darwish K, Magdy W (2007) Error correction vs. query garbling for Arabic OCR document retrieval. ACM Trans Inf Syst (TOIS) 26(1):5","journal-title":"ACM Trans Inf Syst (TOIS)"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"Darwish K, Oard DW (2002) Term selection for searching printed Arabic. In: Proceedings of the 25th annual international ACM SIGIR conference on research and development in information retrieval (SIGIR\u201902), Tampere, pp\u00a0261\u2013268","DOI":"10.1145\/564376.564423"},{"key":"10_CR36","doi-asserted-by":"crossref","unstructured":"Darwish K, Oard D (2002) CLIR experiments at Maryland for TREC 2002: evidence combination for Arabic\u2013English retrieval. In: Text retrieval conference (TREC\u201902), Gaithersburg","DOI":"10.21236\/ADA452814"},{"key":"10_CR37","first-page":"25","volume-title":"Examining the effect of improved context sensitive morphology on Arabic information retrieval","author":"K Darwish","year":"2005","unstructured":"Darwish K, Hassan H, Emam O (2005) Examining the effect of improved context sensitive morphology on Arabic information retrieval. In: Proceedings of the ACL workshop on computational approaches to Semitic languages, Ann Arbor, pp\u00a025\u201330"},{"key":"10_CR38","doi-asserted-by":"crossref","unstructured":"De Roeck A, El-Fares W (2000) A morphologically sensitive clustering algorithm for identifying Arabic roots. In: 38th Annual meeting of the ACL, Hong Kong, pp\u00a0199\u2013206","DOI":"10.3115\/1075218.1075244"},{"key":"10_CR39","unstructured":"Diab M (2009) Second generation tools (AMIRA 2.0): fast and robust tokenization, POS tagging, and Base phrase chunking. In: 2nd international conference on Arabic language resources and tools, Cairo"},{"issue":"3","key":"10_CR40","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1006\/cviu.1998.0692","volume":"70","author":"D Doermann","year":"1998","unstructured":"Doermann D (1998) The indexing and retrieval of document images: a survey. Comput Vis Image Underst 70(3):287\u2013298","journal-title":"Comput Vis Image Underst"},{"key":"10_CR41","first-page":"449","volume-title":"Generating synthetic data for text analysis systems","author":"D Doermann","year":"1995","unstructured":"Doermann D, Yao S (1995) Generating synthetic data for text analysis systems. In: Symposium on document analysis and information retrieval, Las Vegas, pp\u00a0449\u2013467"},{"key":"10_CR42","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1080\/09296179408590017","volume":"1","author":"R Domeij","year":"1994","unstructured":"Domeij R, Hollman J, Kann V (1994) Detection of spelling errors in Swedish not using a Word List en Clair. J Quant Linguist 1:195\u2013201","journal-title":"J Quant Linguist"},{"key":"10_CR43","doi-asserted-by":"crossref","unstructured":"Dumais ST, Furnas GW, Landauer TK, Deerwester S, Harshman R (1988) Using latent semantic analysis to improve access to textual information. In: CHI\u201988 proceedings of the SIGCHI conference on human factors in computing systems, Washington, DC","DOI":"10.1145\/57167.57214"},{"key":"10_CR44","unstructured":"El-Kholy A, Habash N (2010) Techniques for Arabic morphological detokenization and orthographic denormalization. In: Proceedings of language resources and evaluation conference (LREC), Valletta"},{"key":"10_CR45","doi-asserted-by":"crossref","unstructured":"Fraser A, Xu J, Weischedel R (2002) TREC 2002 cross-lingual retrieval at BBN. In: TREC-2002, Gaithersburg","DOI":"10.6028\/NIST.SP.500-251.xlingual-BBN"},{"key":"10_CR46","doi-asserted-by":"crossref","unstructured":"Gao W, Niu C, Nie J-Y, Zhou M, J Hu, Wong K-F, Hon H-W (2007) Cross-lingual query suggestion using query logs of different languages, SIGIR-2007, Amsterdam, pp\u00a0463\u2013470","DOI":"10.1145\/1277741.1277821"},{"key":"10_CR47","first-page":"114","volume":"2009","author":"W Gao","year":"2009","unstructured":"Gao W, Niu C, Zhou M, Wong KF (2009) Joint ranking for multilingual web search. In: ECIR 2009, pp\u00a0114\u2013125","journal-title":"ECIR"},{"key":"10_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1740592.1740594","volume":"28","author":"W Gao","year":"2010","unstructured":"Gao W, Niu C, Nie J-Y, Zhou M, Wong K-F, Hon H-W (2010) Exploiting query logs for cross-lingual query suggestions. ACM Trans Inf Syst 28:1\u201333","journal-title":"ACM Trans Inf Syst"},{"key":"10_CR49","unstructured":"Gey F, Oard D (2011) The TREC-2001 cross-language information retrieval track: searching Arabic using English, French or Arabic queries. In: TREC 2001, Gaithersburg, pp\u00a016\u201323"},{"key":"10_CR50","volume-title":"Arabic text recognition system","author":"A Gillies","year":"1997","unstructured":"Gillies A, Erlandson E, Trenkle J, Schlosser S (1997) Arabic text recognition system. In: The symposium on document image understanding technology, Annapolis"},{"key":"10_CR51","doi-asserted-by":"crossref","unstructured":"Habash N, Rambow O (2007) Arabic diacritization through full morphological tagging. In: Proceedings of NAACL HLT 2007, Rochester, Companion volume, pp\u00a053\u201356","DOI":"10.3115\/1614108.1614122"},{"key":"10_CR52","unstructured":"Han B, Baldwin T (2011) Lexical normalisation of short text messages: makn sens a #twitter. In: Proceedings of the 49th annual meeting of the Association for Computational Linguistics: human language technologies-volume 1, Portland. ACL, pp 368\u2013378"},{"key":"10_CR53","first-page":"345","volume-title":"Probabilistic retrieval of OCR-degraded text using N-grams","author":"S Harding","year":"1997","unstructured":"Harding S, Croft W, Weir C (1997) Probabilistic retrieval of OCR-degraded text using N-grams. In: European conference on digital libraries, Pisa. Research and advanced technology for digital libraries. Springer, Berlin\/Heidelberg, pp 345\u2013359"},{"key":"10_CR54","doi-asserted-by":"crossref","unstructured":"Harman D (1992) Overview of the first Text REtrieval conference, Gaithersburg, TREC-1992","DOI":"10.6028\/NIST.SP.500-207"},{"key":"10_CR55","first-page":"1","volume":"TREC-4","author":"D Harman","year":"1995","unstructured":"Harman D (1995) Overview of the fourth Text REtrieval conference, Gaithersburg,TREC-4, p\u00a01","journal-title":"Gaithersburg"},{"key":"10_CR56","unstructured":"Hassibi K (1994) Machine printed Arabic OCR. In: 22nd AIPR workshop: interdisciplinary computer vision, SPIE Proceedings, Washington, DC"},{"key":"10_CR57","unstructured":"Hassibi K (1994) Machine printed Arabic OCR using neural networks. In: 4th international conference on multi-lingual computing, London"},{"key":"10_CR58","unstructured":"Hawking D (1996) Document retrieval in OCR-scanned text. In: 6th parallel computing workshop, Kawasaki"},{"issue":"3","key":"10_CR59","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1145\/979872.979876","volume":"2","author":"D He","year":"2003","unstructured":"He D, Oard DW, Wang J, Luo J, Demner-Fushman D, Darwish K, Resnik P, Khudanpur S, Nossal M, Subotin M, Leuski A (2003) Making MIRACLEs: interactive translingual search for Cebuano and Hindi. ACM Trans Asian Lang Inf Process (TALIP) 2(3):219\u2013244","journal-title":"ACM Trans Asian Lang Inf Process (TALIP)"},{"key":"10_CR60","doi-asserted-by":"crossref","unstructured":"Hefny A, Darwish K, Alkahky A (2011) Is a query worth translating: ask the users! In: ECIR 2011, Dublin, pp\u00a0238\u2013250","DOI":"10.1007\/978-3-642-20161-5_24"},{"key":"10_CR61","doi-asserted-by":"crossref","unstructured":"Hersh WR, Bhuptiraju RT, Ross L, Cohen AM, Kraemer DF, Johnson P (2004) TREC 2004 genomics track overview (TREC-2004), Gaithersburg","DOI":"10.1145\/1067268.1067273"},{"issue":"10","key":"10_CR62","doi-asserted-by":"publisher","first-page":"867","DOI":"10.1002\/(SICI)1097-4571(199710)48:10<867::AID-ASI3>3.0.CO;2-#","volume":"48","author":"I Hmeidi","year":"1997","unstructured":"Hmeidi I, Kanaan G, Evens M (1997) Design and implementation of automatic indexing for information retrieval with Arabic documents. JASIS 48(10):867\u2013881","journal-title":"JASIS"},{"key":"10_CR63","unstructured":"Hong T (1995) Degraded text recognition using visual and linguistic context. Ph.D. thesis, Computer Science Department, SUNY Buffalo, Buffalo"},{"key":"10_CR64","doi-asserted-by":"crossref","unstructured":"Huang J, Efthimiadis EN (2009) Analyzing and evaluating query reformulation strategies in web search logs. In: CIKM\u201909, Hong Kong, 2\u20136 Nov 2009","DOI":"10.1145\/1645953.1645966"},{"issue":"4","key":"10_CR65","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/582415.582418","volume":"20","author":"K Jarvelin","year":"2002","unstructured":"Jarvelin K, Kekalainen J (2002) Cumulated gain-based evaluation of IR techniques. ACM Trans Inf Syst 20(4):422\u2013446","journal-title":"ACM Trans Inf Syst"},{"key":"10_CR66","doi-asserted-by":"crossref","unstructured":"Joachims T (2006) Training linear SVMs in linear time. In: Proceedings of the ACM conference on knowledge discovery and data mining (KDD), Philadelphia","DOI":"10.1145\/1150402.1150429"},{"key":"10_CR67","volume-title":"Speech and language processing","author":"D Jurafsky","year":"2000","unstructured":"Jurafsky D, Martin J (2000) Speech and language processing. Prentice Hall, Upper Saddle River"},{"key":"10_CR68","doi-asserted-by":"crossref","unstructured":"Kantor P, Voorhees E (1996) Report on the TREC-5 confusion track. In: TREC-1996, Gaithersburg","DOI":"10.6028\/NIST.SP.500-238.confusion-overview"},{"key":"10_CR69","doi-asserted-by":"crossref","unstructured":"Kareem Darwish (2013) Arabizi detection and conversion to Arabic. CoRR abs\/1306.6755","DOI":"10.3115\/v1\/W14-3629"},{"key":"10_CR70","unstructured":"Khoja S, Garside R (2001) Automatic tagging of an Arabic corpus using APT. In: The Arabic linguistic symposium (ALS), University of Utah, Salt Lake City"},{"key":"10_CR71","unstructured":"Kiraz G (1998) Arabic computation morphology in the west. In: 6th international conference and exhibition on multi-lingual computing, Cambridge"},{"issue":"2","key":"10_CR72","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1016\/j.lisr.2007.09.003","volume":"30","author":"K Kishida","year":"2008","unstructured":"Kishida K (2008) Prediction of performance of cross-language information retrieval using automatic evaluation of translation. Libr Inf Sci Res 30(2):138\u2013144","journal-title":"Libr Inf Sci Res"},{"issue":"2","key":"10_CR73","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1109\/34.748827","volume":"21","author":"T Kanungo","year":"1998","unstructured":"Kanungo T, Haralick R (1998) An automatic closed-loop methodology for generating character ground-truth for scanned documents. IEEE Trans Pattern Anal Mach Intell 21(2):179\u2013183","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10_CR74","doi-asserted-by":"crossref","unstructured":"Kanungo T, Haralick R, Phillips I (1993) Global and local document degradation models. In: 2nd international conference on document analysis and recognition (ICDAR\u201993), Tsukuba City, pp\u00a0730\u2013734","DOI":"10.1109\/ICDAR.1993.395633"},{"key":"10_CR75","volume-title":"Arabic OCR systems: state of the art","author":"T Kanungo","year":"1997","unstructured":"Kanungo T, Bulbul O, Marton G, Kim D (1997) Arabic OCR systems: state of the art. In: Symposium on document image understanding technology, Annapolis"},{"key":"10_CR76","doi-asserted-by":"crossref","unstructured":"Kanungo T, Marton G, Bulbul O (1999) OmniPage vs. Sakhr: paired model evaluation of two Arabic OCR products. In: SPIE conference on document recognition and retrieval (VI), San Jose","DOI":"10.1117\/12.335808"},{"issue":"3","key":"10_CR77","doi-asserted-by":"publisher","first-page":"633","DOI":"10.1016\/j.ipm.2005.06.006","volume":"42","author":"AM Lam-Adesina","year":"2006","unstructured":"Lam-Adesina AM, Jones GJF (2006) Examining and improving the effectiveness of relevance feedback for retrieval of scanned text documents. Inf Process Manage 42(3):633\u2013649","journal-title":"Inf Process Manage"},{"key":"10_CR78","doi-asserted-by":"crossref","unstructured":"Larkey LS, Ballesteros L, Connell ME (2002) Improving stemming for Arabic information retrieval: light stemming and co-occurrence analysis. Research and development in information retrieval \u2013 SIGIR-2002, Tampere, pp\u00a0275\u2013282","DOI":"10.1145\/564376.564425"},{"key":"10_CR79","doi-asserted-by":"crossref","unstructured":"Lee Y, Papineni K, Roukos S, Emam O, Hassan H (2003) Language model based Arabic word segmentation. In: Proceedings of the 41st annual meeting of the association for computational linguistics, Sapporo, July 2003, pp\u00a0399\u2013406","DOI":"10.3115\/1075096.1075147"},{"key":"10_CR80","doi-asserted-by":"crossref","unstructured":"Lee CJ, Chen CH, Kao SH, Cheng PJ (2010) To translate or not to translate? In: SIGIR-2010, Geneva","DOI":"10.1145\/1835449.1835558"},{"issue":"3","key":"10_CR81","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1016\/j.ipm.2004.06.012","volume":"41","author":"GA Levow","year":"2005","unstructured":"Levow GA, Oard DW, Resnik P (2005) Dictionary-based techniques for cross-language information retrieval. Inf Process Manage J 41(3):523\u2013547","journal-title":"Inf Process Manage J"},{"key":"10_CR82","first-page":"99","volume":"18","author":"Y Li","year":"1997","unstructured":"Li Y, Lopresti D, Tomkins A (1997) Validation of document defect models. IEEE Trans Pattern Anal Mach Intell 18:99\u2013107","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10_CR83","doi-asserted-by":"crossref","unstructured":"Lin WC, Chen HH (2003) Merging mechanisms in multilingual information retrieval. CLEF 2002, LNCS 2785. Springer, Berlin\/New York, pp 175\u2013186","DOI":"10.1007\/978-3-540-45237-9_14"},{"issue":"3","key":"10_CR84","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1561\/1500000016","volume":"3","author":"T-Y Liu","year":"2009","unstructured":"Liu T-Y (2009) Learning to rank for information retrieval. Found Trends Inf Retr 3(3):225\u2013331","journal-title":"Found Trends Inf Retr"},{"key":"10_CR85","first-page":"191","volume-title":"Using consensus sequence voting to correct OCR errors","author":"D Lopresti","year":"1994","unstructured":"Lopresti D, Zhou J (1994) Using consensus sequence voting to correct OCR errors. In: IAPR workshop on document analysis systems, Kaiserslautern, pp\u00a0191\u2013202"},{"key":"10_CR86","unstructured":"Lu Z, Bazzi I, Kornai A, Makhoul J, Natarajan P, Schwartz R (1999) A robust, language-independent OCR system. In: 27th AIPR workshop: advances in computer assisted recognition, Washington, DC. SPIE"},{"key":"10_CR87","unstructured":"Maamouri M, Graff D, Bouziri B, Krouna S, Bies A, Kulick S (2010) LDC standard Arabic morphological analyzer (SAMA) version 3.1. Linguistics Data Consortium, Catalog No. LDC2010L01"},{"key":"10_CR88","doi-asserted-by":"crossref","unstructured":"Magdy W, Darwish K (2006) Arabic OCR error correction using character segment correction, language modeling, and shallow morphology. In: Empirical methods in natural language processing (EMNLP\u201906), Sydney, pp\u00a0408\u2013414","DOI":"10.3115\/1610075.1610132"},{"key":"10_CR89","unstructured":"Magdy W, Darwish K, Rashwan M (2007) Fusion of multiple corrupted transmissions and its effect on information retrieval. In: ESOLE 2007, Cairo"},{"key":"10_CR90","doi-asserted-by":"crossref","unstructured":"Magdy W, Darwish K, El-Saban M (2009) Efficient language-independent retrieval of printed documents without OCR. In: SPIRE 2009, Saariselk\u00e4","DOI":"10.1007\/978-3-642-03784-9_33"},{"key":"10_CR91","volume-title":"Language processing for Arabic microblog retrieval","author":"W Magdy","year":"2012","unstructured":"Magdy W, Darwish K, Mourad A (2012) Language processing for Arabic microblog retrieval. In: CIKM, Maui"},{"key":"10_CR92","doi-asserted-by":"crossref","unstructured":"Mayfield J, McNamee P, Costello C, Piatko C, Banerjee A (2001) JHU\/APL at TREC 2001: experiments in filtering and in Arabic, video, and web retrieval. In: Text retrieval conference (TREC\u201901), Gaithersburg","DOI":"10.6028\/NIST.SP.500-250.apl-jhu"},{"key":"10_CR93","doi-asserted-by":"crossref","unstructured":"McNamee P, Mayfield J (2002) Comparing cross-language query expansion techniques by degrading translation resources. In: SIGIR\u201902, Tampere","DOI":"10.1145\/564376.564406"},{"issue":"5","key":"10_CR94","doi-asserted-by":"crossref","first-page":"735","DOI":"10.1016\/j.ipm.2004.05.001","volume":"40","author":"D Metzler","year":"2004","unstructured":"Metzler D, Croft WB (2004) Combining the language model and inference network approaches to retrieval. Inf Process Manage 40(5):735\u2013750. Special issue on Bayesian Networks and Information Retrieval","journal-title":"Inf Process Manage"},{"issue":"3","key":"10_CR95","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1023\/A:1026564708926","volume":"3","author":"E Mittendorf","year":"2000","unstructured":"Mittendorf E, Sch\u00e4uble P (2000) Information retrieval can cope with many errors. Inf Retr 3(3):189\u2013216. Springer, Netherlands","journal-title":"Inf Retr"},{"key":"10_CR96","volume-title":"A survey of multilingual text retrieval","author":"D Oard","year":"1996","unstructured":"Oard D, Dorr B (1996) A survey of multilingual text retrieval. UMIACS, University of Maryland, College Park"},{"key":"10_CR97","doi-asserted-by":"crossref","unstructured":"Oard D, Gey F (2002) The TREC 2002 Arabic\/English CLIR track. In: TREC-2002, Gaithersburg","DOI":"10.6028\/NIST.SP.500-251.xlingual-overview"},{"issue":"1","key":"10_CR98","first-page":"73","volume":"22","author":"K Oflazer","year":"1996","unstructured":"Oflazer K (1996) Error-tolerant finite state recognition with applications to morphological analysis and spelling correction. Comput Linguist 22(1):73\u201390","journal-title":"Comput Linguist"},{"key":"10_CR99","unstructured":"Page L (1998) Method for node ranking in a linked database. US patent no. 6285999"},{"key":"10_CR100","doi-asserted-by":"crossref","unstructured":"Pirkola A (1998) The effects of query structure and dictionary setups in dictionary-based cross-language information retrieval. In: SIGIR-1998, Melbourne, pp\u00a055\u201363","DOI":"10.1145\/290941.290957"},{"key":"10_CR101","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1002\/asi.4630270302","volume":"27","author":"SE Robertson","year":"1976","unstructured":"Robertson SE, Jones KS (1976) Relevance weighting of search terms. J Am Soc Inf Sci 27:129\u2013146","journal-title":"J Am Soc Inf Sci"},{"key":"10_CR102","unstructured":"Robertson SE, Jones KS (1996) Simple, proven approaches to text-retrieval. Technical report 356, Computer Laboratory, University of Cambridge, Cambridge"},{"issue":"4","key":"10_CR103","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1561\/1500000019","volume":"3","author":"SE Robertson","year":"2009","unstructured":"Robertson SE, Zaragoza H (2009) The probabilistic relevance framework: BM25 and beyond. Found Trends Inf Retr 3(4):333\u2013389","journal-title":"Found Trends Inf Retr"},{"key":"10_CR104","first-page":"343","volume":"4","author":"G Salton","year":"1969","unstructured":"Salton G, Lesk M (1969) Relevance assessments and retrieval system evaluation. Inf Storage Retr 4:343\u2013359","journal-title":"Inf Storage Retr"},{"key":"10_CR105","volume-title":"Introduction to modern information retrieval","author":"G Salton","year":"1983","unstructured":"Salton G, McGill M (1983) Introduction to modern information retrieval. McGraw-Hill, New\u00a0York"},{"issue":"11","key":"10_CR106","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A, Yang CS (1975) A vector space model for automatic indexing. Commun ACM 18(11):613\u2013620","journal-title":"Commun ACM"},{"issue":"11","key":"10_CR107","doi-asserted-by":"publisher","first-page":"1022","DOI":"10.1145\/182.358466","volume":"26","author":"G Salton","year":"1983","unstructured":"Salton G, Fox EA, Wu H (1983) Extended Boolean information retrieval. Commun ACM 26(11):1022\u20131036","journal-title":"Commun ACM"},{"key":"10_CR108","doi-asserted-by":"crossref","unstructured":"Sanderson M (1994) Word sense disambiguation and information retrieval. In: SIGIR\u201994, Dublin, pp\u00a0142\u2013151","DOI":"10.1007\/978-1-4471-2099-5_15"},{"key":"10_CR109","doi-asserted-by":"crossref","unstructured":"Sanderson M, Joho H (2004) Forming test collections with no system pooling. In: SIGIR\u201904, Sheffield, 25\u201329 July 2004","DOI":"10.1145\/1008992.1009001"},{"key":"10_CR110","volume-title":"CLEF 2005: multilingual retrieval by combining multiple multilingual ranked lists","author":"L Si","year":"2005","unstructured":"Si L, Callan J (2005) CLEF 2005: multilingual retrieval by combining multiple multilingual ranked lists. In: Sixth workshop of the cross-language evaluation forum, CLEF, Vienna"},{"key":"10_CR111","doi-asserted-by":"crossref","unstructured":"Singhal A, Salton G, Buckley C (1996) Length normalization in degraded text collections. In: 5th annual symposium on document analysis and information retrieval, Las Vegas","DOI":"10.1016\/0306-4573(96)00008-8"},{"key":"10_CR112","volume-title":"An analysis of the effects of data corruption on text retrieval performance","author":"S Smith","year":"1990","unstructured":"Smith S (1990) An analysis of the effects of data corruption on text retrieval performance. Thinking Machines Corp, Cambridge"},{"key":"10_CR113","volume-title":"Ranking retrieval systems without relevance judgments","author":"I Soboroff","year":"2001","unstructured":"Soboroff I, Nicholas C, Cahan P (2001) Ranking retrieval systems without relevance judgments. In: SIGIR, New Orleans"},{"key":"10_CR114","volume-title":"Cross lingual and semantic retrieval for cultural heritage appreciation","author":"I Szpektor","year":"2007","unstructured":"Szpektor I, Dagan I, Lavie A, Shacham D, Wintner S (2007) Cross lingual and semantic retrieval for cultural heritage appreciation. In: Proceedings of the workshop on language technology for cultural heritage data, Prague"},{"key":"10_CR115","volume-title":"Results and implications of the noisy data projects, 1994","author":"K Taghva","year":"1994","unstructured":"Taghva K, Borasack J, Condit A, Gilbreth J (1994) Results and implications of the noisy data projects, 1994. Information Science Research Institute, University of Nevada, Las Vegas"},{"key":"10_CR116","volume-title":"An expert system for automatically correcting OCR output","author":"K Taghva","year":"1994","unstructured":"Taghva K, Borsack J, Condit A (1994) An expert system for automatically correcting OCR output. In: SPIE-document recognition, San Jose"},{"key":"10_CR117","volume-title":"Querying short OCR\u2019d documents","author":"K Taghva","year":"1995","unstructured":"Taghva K, Borasack J, Condit A, Inaparthy P (1995) Querying short OCR\u2019d documents. Information Science Research Institute, University of Nevada, Las Vegas"},{"key":"10_CR118","unstructured":"Tillenius M (1996) Efficient generation and ranking of spelling error corrections. NADA technical report TRITA-NA-E9621"},{"key":"10_CR119","volume-title":"A study of learning a merge model for multilingual information retrieval","author":"MF Tsai","year":"2008","unstructured":"Tsai MF, Wang YT, Chen HH (2008) A study of learning a merge model for multilingual information retrieval. In: SIGIR, Singapore"},{"key":"10_CR120","unstructured":"Tseng Y, Oard DW (2001) Document image retrieval techniques for Chinese. In: Symposium on document image understanding technology (SDIUT), Columbia, pp\u00a0151\u2013158"},{"key":"10_CR121","doi-asserted-by":"crossref","unstructured":"Udupa R, Saravanan K, Bakalov A, Bhole A (2009) \u201cThey Are Out There, If You Know Where to Look\u201d: mining transliterations of OOV query terms for cross-language information retrieval. In: ECIR, Toulouse. LNCS, vol\u00a05478, pp\u00a0437\u2013448","DOI":"10.1007\/978-3-642-00958-7_39"},{"key":"10_CR122","volume-title":"Variations in relevance judgments and the measurement of retrieval effectiveness","author":"E Voorhees","year":"1998","unstructured":"Voorhees E (1998) Variations in relevance judgments and the measurement of retrieval effectiveness. In: SIGIR, Melbourne"},{"key":"10_CR123","first-page":"202","volume-title":"Combining bidirectional translation and synonymy for cross-language information retrieval","author":"J Wang","year":"2006","unstructured":"Wang J, Oard DW (2006) Combining bidirectional translation and synonymy for cross-language information retrieval. In: SIGIR, Seattle, pp\u00a0202\u2013209"},{"key":"10_CR124","volume-title":"Detection & tracking: a case study in corpus creation & evaluation methodologies","author":"C Wayne","year":"1998","unstructured":"Wayne C (1998) Detection & tracking: a case study in corpus creation & evaluation methodologies. Language resources and evaluation conference, Granada"},{"key":"10_CR125","volume-title":"A study of using an out-of-box commercial MT system for query translation in CLIR","author":"D Wu","year":"2008","unstructured":"Wu D, He D, Ji H, Grishman R (2008) A study of using an out-of-box commercial MT system for query translation in CLIR. In: Workshop on improving non-English web searching, CIKM, Napa Valley"},{"key":"10_CR126","doi-asserted-by":"crossref","unstructured":"Yona S, Wintner S (2008) A finite-state morphological grammar of Hebrew. In: Proceedings of the ACL-2005 workshop on computational approaches to Semitic languages, Ann Arbor, June 2005","DOI":"10.3115\/1621787.1621790"}],"container-title":["Theory and Applications of Natural Language Processing","Natural Language Processing of Semitic Languages"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-45358-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T14:15:05Z","timestamp":1746195305000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-45358-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783642453571","9783642453588"],"references-count":126,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-45358-8_10","relation":{},"ISSN":["2192-032X","2192-0338"],"issn-type":[{"type":"print","value":"2192-032X"},{"type":"electronic","value":"2192-0338"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"25 March 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}