{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T05:21:24Z","timestamp":1772083284522,"version":"3.50.1"},"reference-count":101,"publisher":"Springer Science and Business Media LLC","issue":"2-3","license":[{"start":{"date-parts":[[2000,5,1]],"date-time":"2000-05-01T00:00:00Z","timestamp":957139200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2000,5,1]],"date-time":"2000-05-01T00:00:00Z","timestamp":957139200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Information Retrieval"],"published-print":{"date-parts":[[2000,5]]},"DOI":"10.1023\/a:1009950525500","type":"journal-article","created":{"date-parts":[[2002,12,23]],"date-time":"2002-12-23T00:25:11Z","timestamp":1040603111000},"page":"141-163","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":78,"title":["Information Retrieval from Documents: A Survey"],"prefix":"10.1007","volume":"2","author":[{"given":"M.","family":"Mitra","sequence":"first","affiliation":[]},{"given":"B.B.","family":"Chaudhuri","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"260689_CR1","unstructured":"Adar E and Hylton J (1995) On-the-fly hyperlink creation for page images. In: Second Annual Conference on the Theory and Practice of Digital Libraries. http:\/\/csdl.tamu.edu\/DL95\/papers\/adar\/adar.html (visited March 20th, 2000)."},{"key":"260689_CR2","doi-asserted-by":"crossref","unstructured":"Ballerini J, Buchel M, Domenig R, Knaus D, Mateev B, Mittendorf E, Schauble P, Sheridan P and Wechsler M (1997) SPIDER retrieval system at TREC-5. In: Voorhees E and Harman D (Eds.), The Fifth Text REtrieval Conference (TREC-5). NIST Special Publication 500\u2013238, pp. 217\u2013228.","DOI":"10.6028\/NIST.SP.500-238.confusion-ETH"},{"key":"260689_CR3","doi-asserted-by":"crossref","unstructured":"Ballesteros L and Croft W(1997) Phrasal translation and query expansion techniques for cross-language information retrieval. In: Belkin N, Narasimhalu A and Willett P (Eds.), Proceedings of the 20th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval. ACM Press, pp. 84\u201391.","DOI":"10.1145\/258525.258540"},{"key":"260689_CR4","doi-asserted-by":"crossref","unstructured":"Ballesteros L and Croft W (1998) Resolving ambiguity for cross-language information retrieval. In: Croft W, Moffat A and van Rijsbergen C (Eds.), Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval. ACM Press, pp. 64\u201371.","DOI":"10.1145\/290941.290958"},{"key":"260689_CR5","unstructured":"Belkin N and Croft W(1987) Retrieval Techniques. In:Williams M(Ed.), Annual Review of Information Science and Technology. Elsevier Science, pp. 109\u2013145."},{"key":"260689_CR6","doi-asserted-by":"crossref","unstructured":"Bloomberg D and Chen F (1996) Extraction of text-related features for condensing image documents. In: Vincent L and Hull J. (Eds.), Proceedings of the SPIE\u2013Document Recognition III. The International Society for Optical Engineering (SPIE), Vol. 2660, pp. 72\u201388. http:\/\/www.parc.xerox.com.istl\/members\/bloomberg\/ spie96dimsum.pdf (visited March 20, 2000).","DOI":"10.1117\/12.234726"},{"key":"260689_CR7","first-page":"67","volume-title":"Symposium on Document Analysis and Information Retrieval","author":"T Bruckner","year":"1995","unstructured":"Bruckner T, Suda P, Block H and Maderlechner G (1995) Inhouse mail distribution by autoamtic address and content interpretation. In: Symposium on Document Analysis and Information Retrieval. Information Science Research Institute, University of Nevada, Las Vegas, pp. 67\u201375."},{"key":"260689_CR8","doi-asserted-by":"crossref","unstructured":"Buckley C, Salton G, Allan J and Singhal A (1995) Automatic query expansion using SMART: TREC-3. In: Harman D (Ed.), The Third Text REtrieval Conference (TREC-3). NIST Special Publication 500\u2013225.","DOI":"10.6028\/NIST.SP.500-225.routing-cornell"},{"key":"260689_CR9","doi-asserted-by":"crossref","unstructured":"Buckley C, Singhal A and Mitra M (1997) Using query zoning and correlation within SMART: TREC5. In: Voorhees E and Harman D (Eds.), The Fifth Text REtrieval Conference (TREC-5). NIST Special Publication 500\u2013238, pp. 105\u2013118.","DOI":"10.6028\/NIST.SP.500-238.routing-Cornell"},{"key":"260689_CR10","unstructured":"Buckley C, Singhal A, MitraMand Salton G(1996) Newretrieval approaches usingSMART: TREC-4. In: Harman D (Ed.), The Fourth Text REtrieval Conference (TREC-4). NIST Special Publication 500\u2013236, pp. 25\u201348."},{"key":"260689_CR11","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1007\/BF02626994","volume":"5","author":"R Casey","year":"1992","unstructured":"Casey R, Ferguson D, Mohiuddin K and Walach E (1992) Intelligent form processing system. Machine Vision and Applications, 5:143\u2013155.","journal-title":"Machine Vision and Applications"},{"key":"260689_CR12","first-page":"163","volume-title":"Symposium on Document Analysis and Information Retrieval","author":"F Chen","year":"1996","unstructured":"Chen F and Bloomberg D(1996) Extraction of thematically relevant text from images. In: Symposium on Document Analysis and Information Retrieval. Information Science Research Institute, University of Nevada, Las Vegas, pp. 163\u2013178."},{"key":"260689_CR13","doi-asserted-by":"crossref","unstructured":"Chen F and Bloomberg D(1997) Extraction of indicative summary sentences from imaged documents. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 227\u2013232.","DOI":"10.1109\/ICDAR.1997.619846"},{"key":"260689_CR14","doi-asserted-by":"crossref","unstructured":"Chen F and Bloomberg D (1998) Summarization of imaged documents without OCR. Computer Vision and Image Understanding, 70(3).","DOI":"10.1006\/cviu.1998.0688"},{"key":"260689_CR15","doi-asserted-by":"crossref","unstructured":"Chen F, Bloomberg D and Wilcox L (1995) Spotting phrases in lines of imaged text. In: Vincent L and Baird H (Eds.), Proceedings of the SPIE\u2013Document Recognition II. The International Society for Optical Engineering (SPIE), Vol. 2422, pp. 256\u2013269.","DOI":"10.1117\/12.205828"},{"key":"260689_CR16","unstructured":"Chen F, Wilcox L and Bloomberg D(1993a) Detecting and locating partially specified keywords in scanned images using hidden Markov models. In: Proceedings of the Second International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 133\u2013138."},{"key":"260689_CR17","first-page":"1","volume":"5","author":"F Chen","year":"1993","unstructured":"Chen F, Wilcox L and Bloomberg D (1993b) Word spotting in scanned images using hidden Markov models. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE Computer Society Press, Vol. 5, pp. 1\u20134.","journal-title":"Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE Computer Society Press"},{"key":"260689_CR18","unstructured":"Croft W, Harding S, Taghva K and Borsack J (1994) An evaluation of information retrieval accuracy with simulated OCR Output. In: Symposium on Document Analysis and Information Retrieval, pp. 115\u2013126. http:\/\/cobar.cs.umass.edu\/pubfiles\/ocr.ps.gz (visited March 20th, 2000)."},{"issue":"4","key":"260689_CR19","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1108\/eb026683","volume":"35","author":"W Croft","year":"1979","unstructured":"Croft W and Harper D (1979) Using probabilistic models of document retrieval without relevance information.Documentation, 35(4):285\u2013295.","journal-title":"Documentation"},{"key":"260689_CR20","doi-asserted-by":"crossref","unstructured":"Cullen J, Hull J and Hart P (1997) Document image database retrieval and browsing using texture analysis. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 718\u2013721.","DOI":"10.1109\/ICDAR.1997.620602"},{"key":"260689_CR21","unstructured":"Decurtins J (1997) Comparison of OCR vs. word shape recognition for keyword spotting. In: Symposium on Document Image Understanding Technology, pp. 205\u2013213."},{"key":"260689_CR22","doi-asserted-by":"crossref","unstructured":"DeCurtins J and Chen E (1995) Keyword spotting via word shape recognition. In: Vincent L and Baird H (Eds.), Proceedings of the SPIE\u2013Document Recognition II. The International Society for Optical Engineering (SPIE), Vol. 2422, pp. 270\u2013277.","DOI":"10.1117\/12.205829"},{"issue":"2","key":"260689_CR23","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1016\/0031-3203(94)90062-0","volume":"27","author":"G DeSilva","year":"1994","unstructured":"DeSilva G and Hull J (1994) Proper noun detection in document images. Pattern Recognition, 27(2):311\u2013320.","journal-title":"Pattern Recognition"},{"key":"260689_CR24","doi-asserted-by":"crossref","unstructured":"Doermann D (1997) The retrieval of document images: A brief survey. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 945\u2013949.","DOI":"10.1109\/ICDAR.1997.620650"},{"issue":"3","key":"260689_CR25","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1006\/cviu.1998.0692","volume":"70","author":"D Doermann","year":"1998","unstructured":"Doermann D (1998) The indexing and retrieval of document images: A survey. Computer Vision and Image Understanding, 70(3):287\u2013298.","journal-title":"Computer Vision and Image Understanding"},{"key":"260689_CR26","doi-asserted-by":"crossref","unstructured":"Doermann D, Li H and Kia O (1997) The detection of duplicates in document image databases. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 314\u2013318.","DOI":"10.1109\/ICDAR.1997.619863"},{"issue":"2","key":"260689_CR27","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1007\/BF01214362","volume":"9","author":"D Doermann","year":"1996","unstructured":"Doermann D, Rivlin E and Weiss I (1996) Applying algebraic and differential invariants for logo recognition. Machine Vision and Applications, 9(2):73\u201386.","journal-title":"Machine Vision and Applications"},{"key":"260689_CR28","unstructured":"Efthimiadis E and Biron P (1994) UCLA-Okapi at TREC-2: Query Expansion Experiments. In: The Second Text REtrieval Conference (TREC-2). NIST Special Publication 500\u2013215, pp. 279\u2013290. http:\/\/trec.nist.gov\/ pubs\/trec2\/papers\/txt\/28.txt (visited March 20th, 2000)."},{"issue":"3","key":"260689_CR29","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1016\/0306-4573(94)00054-7","volume":"31","author":"DA Evans","year":"1995","unstructured":"Evans DA and Lefferts RG (1995) CLARIT-TREC experiments. Information Processing and Management, 31(3):385\u2013395.","journal-title":"Information Processing and Management"},{"issue":"9","key":"260689_CR30","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1109\/2.410146","volume":"28","author":"M Flickner","year":"1995","unstructured":"Flickner M, Sawhney H, Niblack W, Ashley J, Huang Q, Dom B, Gorkani M, Hafner J, Lee D, Petkovic D, Steele D and Yanker P (1995) Query by Image and Video Content: the QBIC System. IEEE Computer, 28(9):23\u201332.","journal-title":"IEEE Computer"},{"key":"260689_CR31","unstructured":"Fox E, Betrabet S, Koushik M and Lee W (1992) Extended Boolean models. In: Frakes W and Baeza-Yates R (Eds.), Information Retrieval Data Structures and Algorithms, Prentice Hall, pp. 393\u2013418."},{"key":"260689_CR32","doi-asserted-by":"crossref","unstructured":"Gudivada V and Raghavan V, Eds. (1995) Special issue on content-based image retrieval systems, IEEE Computer Society Press. IEEE Computer, 28(9).","DOI":"10.1109\/2.410145"},{"key":"260689_CR33","doi-asserted-by":"crossref","unstructured":"Harding S, Croft W and Weir C (1997) Probabilistic retrieval of OCR degraded text using N-grams. In: Peters C and Thanos C (Eds.), Research and Advanced Technology for Digital Libraries, pp. 345\u2013359.","DOI":"10.1007\/BFb0026737"},{"key":"260689_CR34","doi-asserted-by":"crossref","unstructured":"Harman D (1996) Overview of the fourth Text REtrieval conference (TREC-4). In: Harman D (Ed.), The Fourth Text REtrieval Conference (TREC-4). NIST Special Publication 500\u2013236, pp. 1\u201323.","DOI":"10.6028\/NIST.SP.500-236.overview-overview"},{"key":"260689_CR35","doi-asserted-by":"crossref","first-page":"821","DOI":"10.1016\/0167-8655(92)90133-K","volume":"13","author":"T Ho","year":"1992","unstructured":"Ho T, Hull J and Srihari S (1992) A word shape analysis approach to lexicon based word recognition. Pattern Recognition Letters, 13:821\u2013826.","journal-title":"Pattern Recognition Letters"},{"key":"260689_CR36","unstructured":"Huang J (1998) Color-spatial image indexing and applications. PhD Thesis, Department of Computer Science, Cornell University. http:\/\/www.cs.cornell.edu\/Info\/People\/huang\/thesis.pdf (visited March 20th, 2000)."},{"key":"260689_CR37","doi-asserted-by":"crossref","unstructured":"Huang J, Kumar S, Mitra M, Zhu W and Zabih R (1997) Image indexing using color correlograms. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR). IEEE Computer Society Press, pp. 762\u2013768.","DOI":"10.1109\/CVPR.1997.609412"},{"key":"260689_CR38","doi-asserted-by":"crossref","unstructured":"Hull D and Grefenstette G (1996) Querying across languages: a dictionary-based approach to multilingual information retrieval. In: Frei H, Harman D, Schauble P and Wilkinson R (Eds.), Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval. ACM Press, pp. 49\u201357.","DOI":"10.1145\/243199.243212"},{"key":"260689_CR39","doi-asserted-by":"crossref","unstructured":"Hull D, Grefenstette G, Schulze B, Gaussier E, Schutze H and Pedersen J (1997) Xerox TREC-5 site report: routing, filtering, NLP, and Spanish tracks. In: Voorhees E and Harman D (Eds.), The Fifth Text REtrieval Conference (TREC-5). NIST Special Publication 500\u2013238, pp. 167\u2013180.","DOI":"10.6028\/NIST.SP.500-238.Spanish-Xerox"},{"key":"260689_CR40","unstructured":"Hull J (1992a)Ahidden Markov model for language syntax in text recognition. In: Proceedings of the International Conference on Pattern Recognition. IEEE Computer Society Press."},{"key":"260689_CR41","unstructured":"Hull J (1992b) Incorporation of a Markov model of language syntax in a text recognition algorithm. In: O'Gorman L and Kasturi R (Eds.), Document Image Analysis. IEEE Computer Society Press, pp. 287\u2013297."},{"key":"260689_CR42","doi-asserted-by":"crossref","unstructured":"Hull J and Cullen J (1997) Document image similarity and equivalence detection. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 308\u2013312.","DOI":"10.1109\/ICDAR.1997.619862"},{"issue":"9","key":"260689_CR43","doi-asserted-by":"crossref","first-page":"928","DOI":"10.1109\/34.161351","volume":"14","author":"S Joseph","year":"1992","unstructured":"Joseph S and Pridmore T (1992) Knowledge-directed interpretation of mechanical engineering drawings. IEEE Transactions on Pattern Analysis and Machine Intelligence, 14(9):928\u2013940.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"260689_CR44","doi-asserted-by":"crossref","unstructured":"Kantor PB and Voorhees EM (1997) Report on the TREC-5 confusion track. In: Voorhees E and Harman D (Eds.), The Fifth Text REtrieval Conference (TREC-5). NIST Special Publication 500\u2013238.","DOI":"10.6028\/NIST.SP.500-238"},{"issue":"2","key":"260689_CR45","first-page":"134","volume":"12","author":"T Kato","year":"1991","unstructured":"Kato T, Kurita Tand Shimogaki H(1991) Intelligent visual interaction with image databases. Journal of Information Processing of Japan, 12(2):134\u2013143.","journal-title":"Journal of Information Processing of Japan"},{"key":"260689_CR46","doi-asserted-by":"crossref","unstructured":"Kelledy F and Smeaton A(1997) TREC-5 experiments at Dublin City University: query space reduction, Spanish & character shape encoding. In: Voorhees E and Harman D (Eds.), The Fifth Text REtrieval Conference (TREC-5). NIST Special Publication 500\u2013238, pp. 197\u2013207.","DOI":"10.6028\/NIST.SP.500-238.Dublin"},{"issue":"1","key":"260689_CR47","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1006\/cviu.1996.0005","volume":"63","author":"S Khoubyari","year":"1996","unstructured":"Khoubyari S and Hull J (1996) Font and function word identification in document recognition. Computer Vision and Image Understanding, 63(1):66\u201374.","journal-title":"Computer Vision and Image Understanding"},{"key":"260689_CR48","doi-asserted-by":"crossref","unstructured":"Kia O and Doermann D (1996) Structural compression for document analysis. In: Proceedings of the International Conference on Pattern Recognition. IEEE Computer Society Press, pp. 664\u2013668.","DOI":"10.1109\/ICPR.1996.547029"},{"issue":"8","key":"260689_CR49","doi-asserted-by":"crossref","first-page":"842","DOI":"10.1109\/34.308482","volume":"16","author":"S Kuo","year":"1994","unstructured":"Kuo S and Agazzi O (1994) Keyword spotting in poorly printed documents using pseudo-2D hidden Markov models. IEEE Transactions on Pattern Analysis and Machine Intelligence, 16(8):842\u2013848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"260689_CR50","unstructured":"Lagoze C, Shaw E, Davis J and Krafft D (1995) Dienst: implementation reference manual. Technical Report 95\u20131514, Dept. of Computer Science, Cornell University. http:\/\/estr.cs.cornell.edu:80\/Dienst\/UI\/1.0\/Display\/ ncstrl.cornell\/TR95\u20131514 (visited March 20th, 2000)."},{"issue":"7","key":"260689_CR51","doi-asserted-by":"crossref","first-page":"722","DOI":"10.1109\/34.506794","volume":"18","author":"F Liu","year":"1996","unstructured":"Liu F and Picard R (1996) Periodicity, directionality and randomness: wold features for image modeling and retrieval. IEEE Transactions on Pattern Analysis and Machine Intelligence, 18(7):722\u2013733.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"260689_CR52","first-page":"291","volume-title":"Symposium on Document Analysis and Information Retrieval","author":"O Lorenz","year":"1995","unstructured":"Lorenz O and Monagan G (1995) A retrieval system for graphical documents. In: Symposium on Document Analysis and Information Retrieval. Information Science Research Institute, University of Nevada, Las Vegas, pp. 291\u2013300."},{"key":"260689_CR53","volume-title":"NETRA: A toolbox for navigating large image databases","author":"W Ma","year":"1997","unstructured":"Ma W (1997) NETRA: A toolbox for navigating large image databases. PhD Thesis, Department of Electrical and Computer Engineering, University of California, Santa Barbara. http:\/\/vivaldi.ece.ucsb.edu\/users\/wei\/ mypapers\/thesis.html (visited March 20th, 2000)."},{"key":"260689_CR54","doi-asserted-by":"crossref","unstructured":"Manjunath B and Ma W (1996) Texture features for browsing and retrieval of image data. IEEE Transactions on Pattern Analysis and Machine Intelligence, 18(11), Special issue on digital libraries.","DOI":"10.1109\/34.531803"},{"key":"260689_CR55","unstructured":"Manmatha R (1997) Multimedia indexing and retrieval research at the Center for Intelligent Information Retrieval. In: Symposium on Document Image Understanding Technology, pp. 16\u201330."},{"key":"260689_CR56","unstructured":"Manmatha R and Croft W(1998)Word spotting: indexing handwritten archives. In: Maybury M (Ed.), Intelligent Multi-media Information Retrieval. AAAI\/MIT Press."},{"key":"260689_CR57","doi-asserted-by":"crossref","unstructured":"Manmatha R, Han C and Riseman E (1996a)Word spotting: a new approach to indexing handwriting. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR). IEEE Computer Society Press, pp. 631\u2013637.","DOI":"10.1109\/CVPR.1996.517139"},{"key":"260689_CR58","doi-asserted-by":"crossref","unstructured":"Manmatha R, Han C, Riseman E and Croft W (1996b) Indexing handwriting using word matching. In: DL '96: Proceedings of the 1st ACM International Conference on Digital libraries, pp. 151\u2013159.","DOI":"10.1145\/226931.226960"},{"key":"260689_CR59","unstructured":"Mitra M, Buckley C, Singhal A and Cardie C (1997) An analysis of statistical and syntactic phrases. In: Proceedings of the 5th RIAO Conference on Computer-Assisted Research of Information, pp. 200\u2013214. http:\/\/www.research.att.com\/\u00bbsinghal\/riao97.ps (visited March 20th, 2000)."},{"key":"260689_CR60","unstructured":"MUC-6 (1995) In: Proceedings of the Sixth Message Understanding Conference (MUC-6). Defence Advanced Research Projects Agency, Morgan Kaufmann."},{"key":"260689_CR61","doi-asserted-by":"crossref","unstructured":"Myka A and Guntzer U (1997) Measuring the effects of OCR errors on similarity linking. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 968\u2013973.","DOI":"10.1109\/ICDAR.1997.620654"},{"key":"260689_CR62","unstructured":"Oard D (1996) Adaptive vector space text filtering for monolingual and cross-language applications. PhD Thesis, University of Maryland. http:\/\/www.clis.umd.edu\/dlrg\/filter\/papers\/thesis.final.ps (visited March 20th, 2000)."},{"key":"260689_CR63","doi-asserted-by":"crossref","unstructured":"Ohta M, Takasu A and Adachi J (1997) Retrieval methods for english text with misrecognized characters. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 950\u2013956.","DOI":"10.1109\/ICDAR.1997.620651"},{"key":"260689_CR64","doi-asserted-by":"crossref","unstructured":"Park I, Yun I and Lee S (1997) Models and algorithms for efficient color image indexing. In: Proceedings of the IEEE Workshop on Content-Based Access of Image and Video Libraries. IEEE Computer Society Press, pp. 36\u201349.","DOI":"10.1109\/IVL.1997.629718"},{"issue":"3","key":"260689_CR65","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1007\/s005300050125","volume":"7","author":"G Pass","year":"1999","unstructured":"Pass G and Zabih R (1999) Comparing images using joint histograms. ACM Journal of Multimedia Systems, 7(3):234\u2013240.","journal-title":"ACM Journal of Multimedia Systems"},{"issue":"3","key":"260689_CR66","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1002\/asi.4630270302","volume":"27","author":"S Robertson","year":"1976","unstructured":"Robertson S and Sparck Jones K (1976) Relevance weighting of search terms. Journal of the American Society for Information Science, 27(3):129\u2013146.","journal-title":"Journal of the American Society for Information Science"},{"key":"260689_CR67","doi-asserted-by":"crossref","unstructured":"Robertson S and Walker S (1994) Some simple effective approximations to the 2\u2013poisson model for probabilistic weighted retrieval. In: Croft Wand van Rijsbergen C (Eds.), Proceedings of the 17th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, Springer-Verlag, pp. 232\u2013241.","DOI":"10.1007\/978-1-4471-2099-5_24"},{"key":"260689_CR68","doi-asserted-by":"crossref","unstructured":"Robertson S, Walker S, Jones S, Hancock-Beaulieu M and Gatford M (1995) Okapi at TREC-3. In: Harman D (Ed.), The Third Text REtrieval Conference (TREC-3). NIST Special Publication 500\u2013225.","DOI":"10.6028\/NIST.SP.500-225.routing-city"},{"key":"260689_CR69","unstructured":"Salton G (1972) Experiments in multi-lingual information retrieval. Technical Report 72\u2013154. Dept. of Computer Science, Cornell University. http:\/\/cstr.cs.cornell.edu:80\/Dienst\/UI\/1.0\/Display\/ncstrl.cornell\/TR72\u2013154 (visited March 20th, 2000)."},{"issue":"2","key":"260689_CR70","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1145\/1095425.1095427","volume":"16","author":"G Salton","year":"1981","unstructured":"Salton G (1981) A blueprint for automatic indexing. ACM SIGIR Forum, 16(2):22\u201338.","journal-title":"ACM SIGIR Forum"},{"key":"260689_CR71","unstructured":"Salton G(1989) Automatic text processing\u2013the transformation, analysis and retrieval of information by computer. Addison-Wesley Publishing Co., Reading, MA."},{"issue":"5","key":"260689_CR72","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1016\/0306-4573(88)90021-0","volume":"24","author":"G Salton","year":"1988","unstructured":"Salton G and Buckley C (1988) Term-weighting approaches in automatic text retrieval. Information Processing and Management, 24(5):513\u2013523.","journal-title":"Information Processing and Management"},{"key":"260689_CR73","volume-title":"Introduction to Modern Information Retrieval","author":"G Salton","year":"1983","unstructured":"Salton G and McGill M(1983) Introduction to Modern Information Retrieval. McGraw Hill Book Co., New York."},{"issue":"11","key":"260689_CR74","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A and Yang C (1975) A vector space model for information retrieval. Communications of the ACM, 18(11):613\u2013620.","journal-title":"Communications of the ACM"},{"issue":"8","key":"260689_CR75","doi-asserted-by":"crossref","first-page":"783","DOI":"10.1109\/34.531799","volume":"18","author":"H Samet","year":"1996","unstructured":"Samet H and Soffer A (1996) MARCO: map retrieval by content. IEEE Transactions on Pattern Analysis and Machine Intelligence, 18(8):783\u2013798.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"260689_CR76","doi-asserted-by":"crossref","unstructured":"Schauble P and Sheridan P (1997) Cross-language information retrieval (CLIR) track overview. In: Voorhees E and Harman D (Eds.), The Sixth Text REtrieval Conference (TREC-6). NIST Special Publication 500\u2013240, pp. 31\u201343.","DOI":"10.6028\/NIST.SP.500-240.clir-overview"},{"key":"260689_CR77","doi-asserted-by":"crossref","unstructured":"Sheridan P and Ballerini J (1996) Experiments in multilingual information retrieval using the SPIDER system. In: Frei H, Harman D, Schauble P and Wilkinson R (Eds.), Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval. ACM Press, pp. 58\u201365.","DOI":"10.1145\/243199.243213"},{"key":"260689_CR78","unstructured":"Singhal A (1998) Question answering track at TREC-8. http:\/\/www.research.att.com\/\u00bbsinghal\/qa-track.html (visited March 20th, 2000)."},{"key":"260689_CR79","doi-asserted-by":"crossref","unstructured":"Singhal A, Buckley C and Mitra M (1996a) Pivoted document length normalization. In: Frei H, Harman D, Schauble P and Wilkinson R (Eds.), Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval. ACM Press, pp. 21\u201329.","DOI":"10.1145\/243199.243206"},{"key":"260689_CR80","unstructured":"Singhal A, Salton G and Buckley C (1996b) Length normalization in degraded text collections. In: Symposium on Document Analysis and Information Retrieval, pp. 149\u2013162. http:\/\/www.research.att.com\/\u00bbsinghal\/ ocr-norm.ps (visited March 20th, 2000)."},{"key":"260689_CR81","unstructured":"Smeaton A and O'Connor J (1998) User-mediated word shape tokens for querying document images. In: Kay J and Milosavljevic M (Eds.), Proceedings of the Third Australian Document Computing Symposium. http:\/\/www.compapp.dcu.ie\/\u00bbasmeaton\/pubs\/ADCS98\u2013crc.ps.Z (visited March 20th, 2000)."},{"key":"260689_CR82","doi-asserted-by":"crossref","unstructured":"Smeaton A and Spitz A (1997) Using character shape coding for information retrieval. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 974\u2013978.","DOI":"10.1109\/ICDAR.1997.620655"},{"key":"260689_CR83","doi-asserted-by":"crossref","unstructured":"Soffer A(1997) Image categorization using texture features. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 233\u2013237.","DOI":"10.1109\/ICDAR.1997.619847"},{"key":"260689_CR84","first-page":"382","volume-title":"Shape, Structure and Pattern Recognition","author":"A Spitz","year":"1995","unstructured":"Spitz A (1995) Using character shape codes for word spotting in document images. In: Dori D and Bruckstein A (Eds.), Shape, Structure and Pattern Recognition, World Scientific, Singapore, pp. 382\u2013389."},{"issue":"9","key":"260689_CR85","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1109\/2.410153","volume":"28","author":"R Srihari","year":"1995","unstructured":"Srihari R (1995) Automatic indexing and content-based retrieval of captioned images. IEEE Computer, 28(9):49\u201356.","journal-title":"IEEE Computer"},{"key":"260689_CR86","doi-asserted-by":"crossref","unstructured":"Stricker M and Dimai A (1996) Color indexing with weak spatial constraints. In: Sethi I and Jain R (Eds.), Proceedings of the SPIE. The International Society for Optical Engineering (SPIE), Vol. 2670, pp. 29\u201340.","DOI":"10.1117\/12.234802"},{"key":"260689_CR87","doi-asserted-by":"crossref","unstructured":"Strzalkowski T, Lin F and Perez-Carballo J (1998) Natural language information retrieval TREC-6 report. In: Voorhees E and Harman D (Eds.), The Sixth Text REtrieval Conference (TREC-6). NIST Special Publication 500\u2013240, pp. 347\u2013366.","DOI":"10.6028\/NIST.SP.500-240.nlp-GE"},{"key":"260689_CR88","doi-asserted-by":"crossref","unstructured":"Suda P, Bridoux C, Kammerer B and Maderlechner G (1997) Logo and word matching using a general approach to signal registration. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 61\u201365.","DOI":"10.1109\/ICDAR.1997.619814"},{"issue":"1","key":"260689_CR89","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1007\/BF00130487","volume":"7","author":"M Swain","year":"1991","unstructured":"Swain M and Ballard D (1991) Color indexing. International Journal of Computer Vision, 7(1):11\u201332.","journal-title":"International Journal of Computer Vision"},{"key":"260689_CR90","doi-asserted-by":"crossref","unstructured":"Taghva K, Borsack J and Condit A (1994) Expert system for automatically correcting OCR output. In: Vincent L and Pavlidis T (Eds.), Proceedings of the SPIE\u2013Document Recognition. The International Society for Optical Engineering (SPIE), Vol. 2181, p. 270\u2013278.","DOI":"10.1117\/12.171114"},{"key":"260689_CR91","doi-asserted-by":"crossref","unstructured":"Taghva K, Borsack J and Condit A (1997) Information retrieval and OCR. In: Bunke H and Wang P (Eds.), Handbook of Character Recognition and Document Image Analysis,World Scientific Publishing Co., pp. 755\u2013777.","DOI":"10.1142\/9789812830968_0029"},{"key":"260689_CR92","doi-asserted-by":"crossref","unstructured":"Takasu A (1997) An approximate string match for garbled text with various accuracy. In: Proceedings of the Fourth International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 957\u2013961.","DOI":"10.1109\/ICDAR.1997.620652"},{"key":"260689_CR93","unstructured":"Tanaka Y and Torii H (1988) Transmedia machine and its keyword search over image texts. In: Proceedings of the 2nd RIAO Conference on Computer-assisted Research of Information, pp. 248\u2013258."},{"key":"260689_CR94","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/BF02626999","volume":"5","author":"S Taylor","year":"1992","unstructured":"Taylor S, Fritzson R and Pastor J (1992) Intelligent form processing system. Machine Vision and Applications, 5:211\u2013222.","journal-title":"Machine Vision and Applications"},{"key":"260689_CR95","doi-asserted-by":"crossref","unstructured":"Tong X, Zhai C, Milic-Frayling N and Evans D (1997) OCR correction and query expansion for retrieval on OCR data\u2013CLARIT TREC-5 confusion track report. In: Voorhees E and Harman D (Eds.), The Fifth Text REtrieval Conference (TREC-5). NIST Special Publication 500\u2013238, pp. 341\u2013345.","DOI":"10.6028\/NIST.SP.500-238.confusion-CLARITECH"},{"key":"260689_CR96","doi-asserted-by":"crossref","unstructured":"Tsuda K, Senda S, Minoh Mand Ikeda K (1995) Clustering OCR-ed texts for browsing document image database. In: Proceedings of the Third International Conference on Document Analysis and Recognition. IEEE Computer Society Press, pp. 171\u2013174.","DOI":"10.1109\/ICDAR.1995.598969"},{"issue":"7","key":"260689_CR97","doi-asserted-by":"crossref","first-page":"46","DOI":"10.1109\/2.144439","volume":"25","author":"P Vaxiviere","year":"1992","unstructured":"Vaxiviere P and Tombre K (1992) Celesstin: CAD conversion of mechanical drawings. IEEE Computer, 25(7):46\u201354.","journal-title":"IEEE Computer"},{"key":"260689_CR98","doi-asserted-by":"crossref","unstructured":"Voorhees E (1994) Query expansion using lexical-semantic relations. In: Croft W and van Rijsbergen C (Eds.), Proceedings of the 17th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, Springer-Verlag, pp. 61\u201369.","DOI":"10.1007\/978-1-4471-2099-5_7"},{"key":"260689_CR99","doi-asserted-by":"crossref","unstructured":"Voorhees E and Harman D (1998) Overview of the Sixth Text REtrieval conference (TREC-6). In: Voorhees E and Harman D (Eds.), The Sixth Text REtrieval Conference (TREC-6). NIST Special Publication 500\u2013240, pp. 1\u201324.","DOI":"10.6028\/NIST.SP.500-240"},{"key":"260689_CR100","unstructured":"Wartik S (1992) Boolean operations. In: Frakes Wand Baeza-Yates R (Eds.), Information Retrieval Data Strucutres and Algorithms. Prentice Hall, pp. 264\u2013292."},{"key":"260689_CR101","doi-asserted-by":"crossref","unstructured":"Xu J and Croft W (1996) Query expansion using local and global document analysis. In: Frei H, Harman D, Schauble P and Wilkinson R (Eds.), Proceedings of the 19th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, ACM Press, pp. 4\u201311.","DOI":"10.1145\/243199.243202"}],"container-title":["Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1009950525500.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1009950525500\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1009950525500.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T12:20:56Z","timestamp":1748348456000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1009950525500"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2000,5]]},"references-count":101,"journal-issue":{"issue":"2-3","published-print":{"date-parts":[[2000,5]]}},"alternative-id":["260689"],"URL":"https:\/\/doi.org\/10.1023\/a:1009950525500","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2000,5]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}