{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T07:45:27Z","timestamp":1772264727430,"version":"3.50.1"},"reference-count":71,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2006,3,14]],"date-time":"2006-03-14T00:00:00Z","timestamp":1142294400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2006,5]]},"DOI":"10.1007\/s00138-006-0017-3","type":"journal-article","created":{"date-parts":[[2006,3,13]],"date-time":"2006-03-13T09:39:58Z","timestamp":1142242798000},"page":"94-115","source":"Crossref","is-referenced-by-count":60,"title":["Ontological inference for image and video analysis"],"prefix":"10.1007","volume":"17","author":[{"given":"Christopher","family":"Town","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2006,3,14]]},"reference":[{"key":"17_CR1","unstructured":"Abella, A.: From imagery to salience: Locative expressions in context. Ph.D. Thesis, University of Columbia (1995)"},{"key":"17_CR2","unstructured":"Abella, A., Kender, J.: From pictures to words: Generating locative descriptions of objects in an image. In: ARPA94, pp II:909\u2013918 (1994)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Barnard, K., Duygulu, P., Forsyth, D.: Clustering art. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (2001)","DOI":"10.1109\/CVPR.2001.990994"},{"key":"17_CR4","first-page":"1107","volume":"3","author":"K. Barnard","year":"2003","unstructured":"Barnard, K., Duygulu, P., Forsyth, D., Freitas, N., Blei, D., Jordan, M.: Matching words and pictures. J. Mach. Learn. Res. 3, 1107\u20131135 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Barnard, K., Forsyth, D.: Learning the semantics of words and pictures. In: Proceedings of the International Conference on Computer Vision (2001)","DOI":"10.1109\/ICCV.2001.937654"},{"key":"17_CR6","unstructured":"Bobick, A., Richards, W.: Classifying objects from visual information. Technical Report, MIT AI Lab (1986)"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Bunke, H., Pasche, D.: Parsing multivalued strings and its application to image and waveform recognition, structural pattern analysis. World Scientific Publishing, Singapore (1990)","DOI":"10.1142\/9789814368292_0001"},{"issue":"4","key":"17_CR8","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1016\/0262-8856(88)90015-7","volume":"6","author":"H. Buxton","year":"1988","unstructured":"Buxton, H., Walker, N.: Query based visual analysis: Spatio-temporal reasoning in computer vision. Vis. Comput. 6(4), 247\u2013254 (1988)","journal-title":"Vis. Comput."},{"key":"17_CR9","unstructured":"Chen, Y., Rui, Y., Huang, T.: JPDAF based HMM for real-time contour tracking. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (2001)"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Chua, T.S., Teo, K.C., Ooi, B.C., Tan, K.L.: Using domain knowledge in querying image databases. In: Proceeding of the International Conference on Multimedia Modeling (1996)","DOI":"10.1142\/2970"},{"key":"17_CR11","first-page":"309","volume":"9","author":"G. Cooper","year":"1992","unstructured":"Cooper, G., Herskovits, E.: A Bayesian method for the induction of probabilistic networks from data. Machine Learn. 9, 309\u2013347 (1992)","journal-title":"Machine Learn."},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Crowley, J., Coutaz, J., Rey, G., Reignier, P.: Perceptual components for context aware computing. In: Proceedings of the Ubicomp 2002 (2002)","DOI":"10.1007\/3-540-45809-3_9"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Darrell, T., Gordon, G., Harville, M., Woodfill, J.: Integrated person tracking using stereo, color, and pattern detection. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (1998)","DOI":"10.1109\/CVPR.1998.698667"},{"key":"17_CR14","unstructured":"Dennett, D.: Minds, Machines, and Evolution, pp. 129\u2013151. Cambridge University Press, Cambridge (1984)"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Duygulu, P., Barnard, K., De Freitas, J., Forsyth, D.: Object recognition as machine translation: Learning a lexicon for a fixed image vocabulary. In: Proceedings of the European Conference on Computer Vision (2002)","DOI":"10.1007\/3-540-47979-1_7"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Ekin, A., Tekalp, A., Mehrotra, R.: Semantic video querying using an integrated semantic-syntactic model. In: Proceeding of the International Conference on Image Processing (2002)","DOI":"10.1109\/ICIP.2002.1037979"},{"key":"17_CR17","unstructured":"Friedman, N., Koller, D.: Being Bayesian about network structure. In: Proceedings of the Conference on Uncertainty in Artificial Intelligence (2000)"},{"key":"17_CR18","unstructured":"Gl\u00f6ckner, I., Knoll, A.: Fuzzy quantifiers for processing natural-language queries in content-based multimedia. Technical Report TR97-05, Faculty of Technology, University of Bielefeld, Germany (1997)"},{"issue":"3","key":"17_CR19","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1109\/5254.769887","volume":"14","author":"N. Guarino","year":"1999","unstructured":"Guarino, N., Masolo, C., Vetere, G.: Ontoseek: Content-based access to the web. IEEE Intell. Syst. 14(3), 70\u201380 (1999)","journal-title":"IEEE Intell. Syst."},{"key":"17_CR20","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1016\/0167-2789(90)90087-6","volume":"42","author":"S. Harnad","year":"1990","unstructured":"Harnad, S.: The symbol grounding problem. Physica D 42, 335\u2013346 (1990)","journal-title":"Physica D"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Heckerman, D.: A tutorial on learning with Bayesian networks. In: Jordan, M. (ed.) Learning in Graphical Models. MIT Press, Massachusetts (1998)","DOI":"10.1007\/978-94-011-5014-9_11"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Hongeng, S., Nevatia, R.: Large-scale event detection using semi-hidden markov models. In: Proceeding of the International Conference on Computer Vision (2003)","DOI":"10.1109\/ICCV.2003.1238661"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Hoogs, A., Rittscher, J., Stein, G., Schmiederer, J.: Video content annotation using visual analysis and large semantic knowledgebase. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (2003)","DOI":"10.1109\/CVPR.2003.1211487"},{"issue":"2","key":"17_CR24","first-page":"179","volume":"17","author":"M. Hu","year":"1962","unstructured":"Hu, M.: Visual pattern recognition by moment invariants. IRA Trans. Inform. Theory 17(2), 179\u2013187 (1962)","journal-title":"IRA Trans. Inform. Theory"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Jaimes, A., Chang, S.: A conceptual framework for indexing visual information at multiple levels. In: IS&T SPIE Internet Imaging (2000)","DOI":"10.1117\/12.373443"},{"key":"17_CR26","unstructured":"Jaimes, A., Chang, S.F.: Integrating multiple classifiers in visual object detectors learned from user input. In: Proceedings of the Asian Conference on Computer Vision (2000)"},{"key":"17_CR27","volume-title":"An Introduction to Bayesian Networks","author":"F. Jensen","year":"1996","unstructured":"Jensen, F.: An Introduction to Bayesian Networks. Springer-Verlag, New York (1996)"},{"key":"17_CR28","doi-asserted-by":"crossref","unstructured":"Jordan, M. (ed.): Learning in Graphical Models. MIT Press, Massachusetts (1999)","DOI":"10.1007\/978-94-011-5014-9"},{"key":"17_CR29","unstructured":"Katz, B., Lin, J., Stauffer, C., Grimson, E.: Answering questions about moving objects in surveillance videos. In: Proceedings of the AAAI Spring Symposium on New Directions in Question Answering (2003)"},{"key":"17_CR30","unstructured":"Kohler, C.: Selecting ghosts and queues from a car trackers output using a spatio-temporal query language. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (2004)"},{"key":"17_CR31","doi-asserted-by":"crossref","unstructured":"Kokar, M., Wang, J.: An example of using ontologies and symbolic information in automatic target recognition. In: Proceedings of the SPIE Sensor Fusion: Architectures, Algorithms, and Applications VI, pp. 40\u201350 (2002)","DOI":"10.1117\/12.458399"},{"key":"17_CR32","unstructured":"Kruschwitz, U.: Exploiting structure for intelligent web search. In: Proceeding of the International Conference on System Sciences. Maui, Hawaii (2001)"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Lalmas, M.: Information retrieval and Dempster-Shafer's theory of evidence. In: Applications of Uncertainty Formalisms, pp. 157\u2013177. Springer, Berlin Heidelberg New York (1998)","DOI":"10.1007\/3-540-49426-X_8"},{"key":"17_CR34","doi-asserted-by":"crossref","unstructured":"Lim, J.: Learnable visual keywords for image classification. In: Proceedings of the ACM International Conference on Digital Libraries (1999)","DOI":"10.1145\/313238.313290"},{"key":"17_CR35","doi-asserted-by":"crossref","unstructured":"Mezaris, V., Kompatsiaris, I., Strintzis, M.: An ontology approach to object-based image retrieval. In: Proceedings of the International Conference on Image Processing (2003)","DOI":"10.1109\/ICIP.2003.1246729"},{"key":"17_CR36","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1093\/ijl\/3.4.235","volume":"3","author":"G. Miller","year":"1990","unstructured":"Miller, G., Beckwith, R., Fellbaum, C., Gross, D., Miller, K.: Introduction to Wordnet: an on-line lexical database. Int. J. Lexicogr. 3, 235\u2013244 (1990)","journal-title":"Int. J. Lexicogr."},{"key":"17_CR37","doi-asserted-by":"crossref","unstructured":"Mojsilovic, A., Gomes, J., Rogowitz, B.: Isee: Perceptual features for image library navigation. In: Proceedings of the 2002 SPIE Human Vision and Electronic Imaging (2002)","DOI":"10.1117\/12.469523"},{"key":"17_CR38","doi-asserted-by":"crossref","unstructured":"Mueller, H., Marchand-Maillet, S., Pun, T.: The truth about Corel\u2014evaluation in image retrieval. In: Proceedings of the Conference on Image and Video Retrieval, LNCS 2383, pp. 38\u201350. Springer, Berlin Heidelberg, New York (2002)","DOI":"10.1007\/3-540-45479-9_5"},{"issue":"5","key":"17_CR39","first-page":"593","volume":"22","author":"H. Mueller","year":"2001","unstructured":"Mueller, H., Mueller, W., Squire, D., Marchand-Maillet, S., Pun, T.: Performance evaluation in content-based image retrieval: Overview and proposals. Pattern Recog. Lett. 22(5), 593\u2013601 (2001)","journal-title":"Pun, T., Performance evaluation in content-based image retrieval: Overview and proposals. Pattern Recog. Lett."},{"key":"17_CR40","unstructured":"Murphy, K.: The Bayes net toolbox for matlab. Comput. Sci. Stat. 33 (2001)"},{"key":"17_CR41","doi-asserted-by":"crossref","unstructured":"Nepal, S., Ramakrishna, M., Thom, J.: A fuzzy object query language (FOQL) for image databases. In: Proceedings of the Intenational Conference on Database Systems for Advanced Applications (1999)","DOI":"10.1109\/DASFAA.1999.765743"},{"key":"17_CR42","doi-asserted-by":"crossref","unstructured":"Nevatia, R., Hobbs, J., Bolles, B.: An ontology for video event representation. In: Proceedings of the International Workshop on Detection and Recognition of Events in Video (at CVPR04) (2004)","DOI":"10.1109\/CVPR.2004.301"},{"key":"17_CR43","doi-asserted-by":"crossref","unstructured":"Nevatia, R., Zhao, T., Hongeng, S.: Hierarchical language-based representation of events in video streams. In: Proceedings of the IEEE Workshop on Event Mining (2003)","DOI":"10.1109\/CVPRW.2003.10038"},{"key":"17_CR44","doi-asserted-by":"crossref","unstructured":"Park, S., Aggarwal, J.: Event semantics in two-person interactions. In: Proceeding of the International Conference on Pattern Recognition (2004)","DOI":"10.1109\/ICPR.2004.1333745"},{"key":"17_CR45","doi-asserted-by":"crossref","unstructured":"Parsons, S., Hunter, A.: A review of uncertainty handling formalisms. In: Applications of Uncertainty Formalisms, pp. 8\u201337. Springer, Berlin Heidelberg New York (1998)","DOI":"10.1007\/3-540-49426-X_2"},{"issue":"1","key":"17_CR46","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1109\/MIS.2003.1179194","volume":"18","author":"K. Pastra","year":"2002","unstructured":"Pastra, K., Saggion, H., Wilks, Y.: Extracting relational facts for indexing and retrieval of crime-scene photographs. IEEE Intell. Syst. 18(1), 55\u201361 (2002)","journal-title":"IEEE Intell. Syst."},{"key":"17_CR47","unstructured":"Pfeffer, A., Koller, D.: Semantics and inference for recursive probability models. In: Proceedings of the AAAI'00 (2000)"},{"key":"17_CR48","unstructured":"Pfeffer, A., Koller, D., Milch, B., Takusagawa, K.: SPOOK: A system for probabilistic object-oriented knowledge representation. In: Proceeding of the Conference on Uncertainty in AI (1999)"},{"key":"17_CR49","unstructured":"Rodden, K.: Evaluating similarity-based visualisations as interfaces for image browsing. Ph.D. Thesis, Cambridge University Computer Laboratory (2001)"},{"key":"17_CR50","unstructured":"Rowe, N., Frew, B.: Automatic Classification of Objects in Captioned Descriptive Photographs for Retrieval, Chap. 4, pp. 65\u201379. AAAI Press, California (1997)"},{"key":"17_CR51","doi-asserted-by":"crossref","unstructured":"Roweis, S., Ghahramani, Z.: A unifying review of linear Gaussian models. Neural Comput. 11(2), 305\u2013345 (1999)","DOI":"10.1162\/089976699300016674"},{"key":"17_CR52","doi-asserted-by":"crossref","unstructured":"Roy, D.: Learning visually grounded words and syntax of natural spoken language. Evol. Commun. 4, (2001)","DOI":"10.1016\/S0885-2308(02)00024-4"},{"key":"17_CR53","unstructured":"Roy, D.: A trainable visually-grounded spoken language generation system. In: Proceedings of the International Conference of Spoken Language Processing (2002)"},{"key":"17_CR54","doi-asserted-by":"crossref","unstructured":"Sherrah, J., Gong, S.: Tracking discontinuous motion using Bayesian inference. In: Proceeding of the European Conference on Computer Vision, pp. 150\u2013166 (2000)","DOI":"10.1007\/3-540-45053-X_10"},{"key":"17_CR55","doi-asserted-by":"crossref","unstructured":"Sherrah, J., Gong, S.: Continuous global evidence-based Bayesian modality fusion for simultaneous tracking of multiple objects. In: Proceedings of the International Conference on Computer Vision (2001)","DOI":"10.1109\/ICCV.2001.937596"},{"key":"17_CR56","unstructured":"Sinclair, D.: Voronoi seeded colour image segmentation. Technical Report TR99-04, AT&T Laboratories Cambridge (1999)"},{"key":"17_CR57","doi-asserted-by":"crossref","unstructured":"Sinclair, D.: Smooth region structure: folds, domes, bowls, ridges, valleys and slopes. In: Proceedings of the Conference on Computer Vision and Pattern Recognition, pp. 389\u2013394 (2000)","DOI":"10.1109\/CVPR.2000.855845"},{"key":"17_CR58","unstructured":"Smith, P.: Edge-based motion segmentation. Ph.D. Thesis, Cambridge University Engineering Department (2001)"},{"issue":"2","key":"17_CR59","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1016\/S0262-8856(99)00024-4","volume":"18","author":"G. Socher","year":"2000","unstructured":"Socher, G., Sagerer, G., Perona, P.: Bayesian reasoning on qualitative descriptions from images and speech. Image Vis. Comput. 18(2), 155\u2013172 (2000)","journal-title":"Image Vis. Comput."},{"key":"17_CR60","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1007\/3-540-48222-9_7","volume":"2095","author":"M. Spengler","year":"2001","unstructured":"Spengler, M., Schiele, B.: Towards robust multi-cue integration for visual tracking. Lect. Notes Comput. Sci. 2095, 93\u2013106 (2001)","journal-title":"Lect. Notes Comput. Sci."},{"key":"17_CR61","unstructured":"Town, C.: Ontology based visual information processing. Ph.D. Thesis, University of Cambridge (2004)"},{"key":"17_CR62","doi-asserted-by":"crossref","unstructured":"Town, C.: Ontology-driven Bayesian networks for dynamic scene understanding. In: Proceedings of the International Workshop on Detection and Recognition of Events in Video (at CVPR04) (2004)","DOI":"10.1109\/CVPR.2004.413"},{"key":"17_CR63","unstructured":"Town, C., Sinclair, D.: Content based image retrieval using semantic visual categories. Technical Report MV01-211, Society for Manufacturing Engineers (2001)"},{"key":"17_CR64","doi-asserted-by":"crossref","unstructured":"Town, C., Sinclair, D.: Ontological query language for content based image retrieval. In: Proceedings of the IEEE Workshop on Content-Based Access of Image and Video Libraries, pp. 75\u201381 (2001)","DOI":"10.1109\/IVL.2001.990859"},{"issue":"3","key":"17_CR65","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1016\/j.imavis.2003.10.002","volume":"22","author":"C. Town","year":"2004","unstructured":"Town, C., Sinclair, D.: Language-based querying of image collections on the basis of an extensible ontology. Int. J. Image Vis. Comput. 22(3), 251\u2013267 (2004)","journal-title":"Int. J. Image Vis. Comput."},{"key":"17_CR66","doi-asserted-by":"crossref","unstructured":"Tsai, W., Fu, K.: Attributed grammars\u2014a tool for combining syntactic and statistical approaches to pattern recognition. IEEE Trans. Syst. Man Cybernetics SMC-10(12) (1980)","DOI":"10.1109\/TSMC.1980.4308414"},{"key":"17_CR67","doi-asserted-by":"crossref","unstructured":"Tsotsos, J., Mylopoulos, J., Covvey, H., Zucker, S.: A framework for visual motion understanding. IEEE Trans. Pattern Anal. Mach. Intell. Special Issue on Computer Analysis of Time-Varying Imagery, 563\u2013573 (1980)","DOI":"10.1109\/TPAMI.1980.6447704"},{"key":"17_CR68","doi-asserted-by":"crossref","unstructured":"Wachsmuth, S., Socher, G., Brandt-Pook, H., Kummert, F., Sagerer, G.: Integration of vision and speech understanding using Bayesian networks. Videre J. Comput. Vis. Res. 1(4) (2000)","DOI":"10.1007\/3-540-49256-9_15"},{"key":"17_CR69","unstructured":"Wu, Y., Huang, T.: A co-inference approach to robust visual tracking. In: Proceedings of the International Conference on Computer Vision (2001)"},{"issue":"3","key":"17_CR70","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1109\/83.661186","volume":"7","author":"C. Xu","year":"1998","unstructured":"Xu, C., Prince, J.: Snakes, shapes, and gradient vector flow. IEEE Trans. Image Process. 7(3), 359\u2013369 (1998)","journal-title":"IEEE Trans. Image Process."},{"key":"17_CR71","doi-asserted-by":"crossref","unstructured":"Zhao, R., Grosky, W.: From features to semantics: Some preliminary results. In: Proceedings of the IEEE International Conference on Multimedia and Expo, pp. 679\u2013682 (2000)","DOI":"10.1109\/ICME.2000.871453"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-006-0017-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00138-006-0017-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-006-0017-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:10:55Z","timestamp":1736295055000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00138-006-0017-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,3,14]]},"references-count":71,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2006,5]]}},"alternative-id":["17"],"URL":"https:\/\/doi.org\/10.1007\/s00138-006-0017-3","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2006,3,14]]}}}