{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T03:09:35Z","timestamp":1774321775487,"version":"3.50.1"},"reference-count":59,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2008,9,13]],"date-time":"2008-09-13T00:00:00Z","timestamp":1221264000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2008,11]]},"DOI":"10.1007\/s00530-008-0142-0","type":"journal-article","created":{"date-parts":[[2008,9,12]],"date-time":"2008-09-12T04:39:17Z","timestamp":1221194357000},"page":"299-323","source":"Crossref","is-referenced-by-count":23,"title":["COSMOROE: a cross-media relations framework for modelling multimedia dialectics"],"prefix":"10.1007","volume":"14","author":[{"given":"Katerina","family":"Pastra","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2008,9,13]]},"reference":[{"key":"142_CR1","unstructured":"Andr\u00e9, E., Rist, T.: The design of illustrated documents as a planning task. In: Maybury, M. (ed.) Intelligent Multimedia Interfaces, pp. 94\u2013116, Chap. 4. AAAI Press\/MIT Press, Cambridge, MA (1993)"},{"key":"142_CR2","doi-asserted-by":"crossref","unstructured":"Andr\u00e9, E., Rist, T.: Referring to world objects with text and pictures. In: Proceedings of the Computational Linguistics Conference, pp. 530\u2013534 (1994)","DOI":"10.3115\/991886.991978"},{"key":"142_CR3","first-page":"1107","volume":"3","author":"K. Barnard","year":"2003","unstructured":"Barnard K., Duygulu P., Forsyth D., Freitas N., Blei D., Jordan M.: Matching words and pictures. J. Mach. Learn. Res. 3, 1107\u20131135 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"142_CR4","unstructured":"Barras, C., Geoffrois, E., Wu, Z., Liberman, M.: Transcriber: a free tool for segmenting, labeling and transcribing speech. In: Proceedings of the First International Conference on Language Resources and Evaluation, pp. 1373\u20131376 (1998)"},{"key":"142_CR5","unstructured":"Barthes, R.: Image, Music, Text. Flamingo (1984)"},{"key":"142_CR6","unstructured":"Bateman, J., Delin, J., Allen, P.: Constraints on layout in multimodal document generation. In: Proceedings of the Workshop on Coherence in Generated Multimedia, First International Natural Language Generation Conference (2000)"},{"key":"142_CR7","doi-asserted-by":"crossref","unstructured":"Bateman, J., Delin, J., Henschel, R.: Multimodality and empiricism: preparing for a corpus-based approach to the study of multimodal meaning-making. In: Perspectives on Multimodality, pp. 65\u201389. John Benjamins, Amsterdam (2004)","DOI":"10.1075\/ddcs.6.06bat"},{"key":"142_CR8","doi-asserted-by":"crossref","unstructured":"Bernsen N.: Why are analogue graphics and natural language both needed in hci? In: Paterno, F. (ed.) Interactive Systems: Design, specification and verification. Focus on Computer Graphics, pp. 235\u2013251. Springer, Berlin (1995)","DOI":"10.1007\/978-3-642-87115-3_14"},{"issue":"6\/7","key":"142_CR9","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1016\/S0920-5489(97)00013-5","volume":"18","author":"M. Bordegoni","year":"1997","unstructured":"Bordegoni M., Faconti G., Feiner S., Maybury M., Rist T., Ruggieri S., Trahanias P., Wilson M.: A standard reference model for intelligent multimedia presentation systems. Computer Standards Interfaces 18(6\/7), 477\u2013496 (1997)","journal-title":"Computer Standards Interfaces"},{"issue":"2","key":"142_CR10","first-page":"249","volume":"22","author":"J. Carletta","year":"1996","unstructured":"Carletta J.: Assessing agreement on classification tasks: the kappa statistic. Comput. Linguist. 22(2), 249\u2013254 (1996)","journal-title":"Comput. Linguist."},{"key":"142_CR11","doi-asserted-by":"crossref","unstructured":"Carlson, L., Marcu, D., Okurowski, M.: Building a discourse-tagged corpus in the framework of rhetorical structure theory. In: Current Directions in Discourse and Dialogue, pp. 85\u2013112. Kluwer, Dordrecht (2003)","DOI":"10.1007\/978-94-010-0019-2_5"},{"key":"142_CR12","unstructured":"de Carolis, B., Pelachaud, C., Poggi, I.: Verbal and nonverbal discourse planning, proceedings of fourth international conference on autonomous agents. In: Proceedings of the Workshop on Achieving Human-Like Behaviour in Interactive Animated Agents, Fourth International Conference on Autonomous Agents (2000)"},{"key":"142_CR13","doi-asserted-by":"crossref","unstructured":"Cassell, J.: A framework for gesture generation and interpretation. In: Computer Vision in Human\u2013Machine Interaction, Chap. 11. Cambridge University Press, London (1998)","DOI":"10.1017\/CBO9780511569937.013"},{"key":"142_CR14","unstructured":"Chen, L., Liu, Y., Harper, M., Maia, E., McRoy, S.: Evaluating factors impacting the accuracy of forced alignments in a multimodal corpus. In: Proceedings of the 4th Language Resources and Evaluation Conference (2004)"},{"key":"142_CR15","unstructured":"Corio, M., Lapalme, G.: Integrated generation of graphics and text: a corpus study. In: Proceedings of the Association of Computational Linguistics Workshop on Content Visualisation and Intermedia Representation, pp. 63\u201368 (1998)"},{"key":"142_CR16","unstructured":"Corio, M., Lapalme, G.: Generation of texts for information graphics. In: Proceedings of the European Workshop on Natural Languge Generation, pp. 49\u201358 (1999)"},{"key":"142_CR17","doi-asserted-by":"crossref","first-page":"1391","DOI":"10.2214\/ajr.184.5.01841391","volume":"184","author":"P. Crewson","year":"2005","unstructured":"Crewson P.: Fundamental of clinical research for radiologists: reader agreement studies. Am. J. Roentgenol. 184, 1391\u20131397 (2005)","journal-title":"Am. J. Roentgenol."},{"key":"142_CR18","doi-asserted-by":"crossref","unstructured":"Dasiopoulou, S., Papastathis, V., Mezaris, V., Kompatsiaris, I., Strintzis, M.: An ontology framework for knowledge-assisted semantic video analysis and annotation. In: Proceedings of the International Workshop on Knowledge Markup and Semantic Annotation (2004)","DOI":"10.1109\/TCSVT.2005.854238"},{"key":"142_CR19","unstructured":"Everingham, M., Gool, L.V., Williams, C., Zisserman, A.: Pascal visual object classes challenge results. World Wide Web ( http:\/\/www.pascal-network.org\/challenges\/VOC\/voc ) (2005)"},{"key":"142_CR20","doi-asserted-by":"crossref","unstructured":"Fasciano, M., Lapalme, G.: Intentions in the co-ordinated generation of graphics and text from tabular data. Knowl. Inform. Syst. 2(3) (2000)","DOI":"10.1007\/PL00011645"},{"key":"142_CR21","unstructured":"Feiner, S., McKeown, K.: Automating the generation of co-ordinated multimedia explanations. In: Maybury, M. (ed.) Intelligent Multimedia Interfaces, pp. 117\u2013138, chap. 5. AAAI Press\/MIT Press, Cambridge, MA (1993)"},{"key":"142_CR22","volume-title":"WordNet: An Electronic Lexical Database","year":"1998","unstructured":"Fellbaum,C. (ed.):WordNet:An Electronic Lexical Database. The MIT Press, Cambridge, MA (1998)"},{"key":"142_CR23","doi-asserted-by":"crossref","unstructured":"Green, N.: An empirical study of multimedia argumentation. In: Proceedings of the International Conference on Computational Sciences-Part I, pp. 1009\u20131018. Springer, Berlin (2001)","DOI":"10.1007\/3-540-45545-0_113"},{"key":"142_CR24","unstructured":"Gut, U., Looks, K., Thies, A., Trippel, T., Gibbon, D.: Cogest conversational gesture transcription system. Tech. rep., University of Bielefeld (2002)"},{"key":"142_CR25","volume-title":"Consciousness and the Computational Mind","author":"R. Jackendoff","year":"1987","unstructured":"Jackendoff R.: Consciousness and the Computational Mind. MIT Press, Cambridge (1987)"},{"key":"142_CR26","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511807572","volume-title":"Gesture: Visible Action as Utterance","author":"A. Kendon","year":"2004","unstructured":"Kendon A.: Gesture: Visible Action as Utterance. Cambridge University Press, London (2004)"},{"key":"142_CR27","unstructured":"Kipp, M.: Gesture generation by imitation\u2014from human behavior to computer character animation. Boca Raton, Florida: Dissertation.com (2004)"},{"key":"142_CR28","unstructured":"Kipp, M.: Spatiotemporal coding in anvil. In: Proceedings of the 6th Language Resources and Evaluation Conference (2008)"},{"key":"142_CR29","unstructured":"Lin, C., Tseng, B., Smith, J.: Video collaborative annotation forum: Establishing ground-truth labels on large multimedia datasets. TRECVID Proceedings (2003)"},{"key":"142_CR30","unstructured":"Lindley, C., Davis, J., Nack, F., Rutledge, L.: The application of rhetorical structure theory to interactive news program generation from digital archives. Technical Report INS-R0101, Centrum voor Wiskunde en Informatica (2001)"},{"key":"142_CR31","unstructured":"Magno-Caldognetto, E., Poggio, I., Cosi, P., Cavicchio, F., Merola, G.: Multimedia score\u2014an anvil-based annotation scheme for multimodal audio-video analysis. In: Proceedings of the LREC Workshop on Multimodal Corpora: Models of Human Behaviour for the Specification and Evaluation Of Multimodal Input And Output Interfaces, pp. 29\u201333 (2004)"},{"key":"142_CR32","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1007\/978-94-009-3645-4_7","volume-title":"Natural Language Generation: New results in Artificial Intelligence, Psychology and Linguistics","author":"W. Mann","year":"1987","unstructured":"Mann W., Thompson S.: Rhetorical structure theory: description and construction of text structures. In: Kempen, G.(eds) Natural Language Generation: New results in Artificial Intelligence, Psychology and Linguistics, pp. 85\u201395. Nijhoff, Dodrecht (1987)"},{"issue":"6","key":"142_CR33","doi-asserted-by":"crossref","first-page":"647","DOI":"10.1108\/00220410310506303","volume":"59","author":"E. Marsh","year":"2003","unstructured":"Marsh E., Domas-White M.: A taxonomy of relationships between image and text. J. Document. 59(6), 647\u2013672 (2003)","journal-title":"J. Document."},{"key":"142_CR34","unstructured":"Martin, J., Grimard, S., Alexandri, K.: On the annotation of multimodal behavior and computation of cooperation between modalities. In: Proceedings of the International Conference on Autonomous Agents workshop on Representing, Annotating, Evaluating Non-verbal and Verbal Communicative Acts to Achieve Contextual Embodied Agents, pp. 1\u20137 (2001)"},{"key":"142_CR35","unstructured":"Martin, J., Julia, L., Cheyer, A.: A theoretical framework for multimodal user studies. In: Proceedings of the Second International Conference on Cooperative Multimodal Communication, pp. 104\u2013110 (1998)"},{"key":"142_CR36","unstructured":"Martin, J., Kipp, M.: Annotating and measuring multimodal behaviour\u2014tycoon metrics in the anvil tool. In: Proceedings of the Language Resources and Evaluation Conference 2002, pp. 31\u201335 (2002)"},{"issue":"3","key":"142_CR37","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1177\/1470357205055928","volume":"4","author":"R. Martinec","year":"2005","unstructured":"Martinec R., Salway A.: A system for image\u2013text relations in new (and old) media. Vis. Commun. 4(3), 339\u2013374 (2005)","journal-title":"Vis. Commun."},{"key":"142_CR38","volume-title":"Intelligent Multimedia Interfaces","year":"1993","unstructured":"Maybury, M. (ed.): Intelligent Multimedia Interfaces. AAAI Press\/MIT Press, Cambridge, MA (1993)"},{"key":"142_CR39","volume-title":"Intelligent User Interfaces","year":"1998","unstructured":"Maybury, M.,Wahlster,W. (eds.): Intelligent User Interfaces. Morgan Kaufmann Publishers, San Francisco, CA (1998)"},{"key":"142_CR40","doi-asserted-by":"crossref","DOI":"10.7208\/chicago\/9780226514642.001.0001","volume-title":"Gesture and Thought","author":"D. McNeil","year":"2005","unstructured":"McNeil D.: Gesture and Thought. The University of Chicago Press, Chicago, IL (2005)"},{"key":"142_CR41","unstructured":"Minsky, M.: The Society of Mind. Simon and Schuster Inc., NY, USA (1986)"},{"issue":"4","key":"142_CR42","first-page":"651","volume":"19","author":"J. Moore","year":"1993","unstructured":"Moore J., Paris C.: Planning text for advisory dialogues: capturing intentional and rhetorical information. Comput. Linguist. 19(4), 651\u2013695 (1993)","journal-title":"Comput. Linguist."},{"issue":"4","key":"142_CR43","first-page":"537","volume":"18","author":"J. Moore","year":"1992","unstructured":"Moore J., Pollack M.: Problem for RST: the need for multi-level discourse analysis. Comput. Linguist. 18(4), 537\u2013544 (1992)","journal-title":"Comput. Linguist."},{"key":"142_CR44","unstructured":"Nicholas, N.: Parameters for rhetorical structure theory ontology. In: University of Melbourne Working Papers in Linguistics, vol. 15, pp. 77\u201393. University of Melbourne, Melbourne (1995)"},{"key":"142_CR45","unstructured":"Pastra, K.: The language of caricature: language and drawing interaction. Final year project, Department of Greek Philology and Linguistics, University of Athens (1999) (in Greek)"},{"key":"142_CR46","unstructured":"Pastra, K.: Viewing vision\u2013language integration as a double-grounding case. In: Proceedings of the AAAI Fall Symposium on Achieving Human-Level Intelligence through Integrated Systems and Research, pp. 62\u201367 (2004)"},{"key":"142_CR47","unstructured":"Pastra, K.: Vision\u2013language integration: a double-grounding case. Ph.D. thesis, University of Sheffield (2005)"},{"key":"142_CR48","unstructured":"Pastra, K.: Beyond multimedia integration: corpora and annotations for cross-media decision mechanisms. In: Proceedings of the 5th Language Resources and Evaluation Conference, pp. 499\u2013504 (2006)"},{"key":"142_CR49","unstructured":"Pastra, K., Piperidis, S.: Video search: new challenges in the pervasive digital video era. J. Virtual Reality Broadcast. 3(11) (2006)"},{"issue":"1","key":"142_CR50","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1109\/MIS.2003.1179194","volume":"18","author":"K. Pastra","year":"2003","unstructured":"Pastra K., Saggion H., Wilks Y.: Intelligent indexing of crime-scene photographs. IEEE Intell. Syst. 18(1), 55\u201361 (2003)","journal-title":"IEEE Intell. Syst."},{"key":"142_CR51","unstructured":"Pastra, K., Wilks, Y.: Vision\u2013language integration in AI: a reality check. In: Proceedings of the 16th European Conference in Artificial Intelligence, pp. 937\u2013941 (2004)"},{"key":"142_CR52","doi-asserted-by":"crossref","unstructured":"Radev, D.: A common theory of information fusion from multiple text sources. step one: cross document structure. In: Proceedings of the 1st SIGdial Workshop on Discourse and Dialogue, pp. 74\u201383 (2000)","DOI":"10.3115\/1117736.1117745"},{"key":"142_CR53","unstructured":"Rocchi, C., Zancanaro, M.: Generation of video documentaries from discourse structures. In: Proceedings of the 9th European Workshop on Natural Language Generation (EWNLG 9) (2003)"},{"key":"142_CR54","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1080\/01638539209544800","volume":"15","author":"T. Sanders","year":"1992","unstructured":"Sanders T., Spooren W., Noordman L.: Toward a taxonomy of coherence relations. Discourse Process. 15, 1\u201335 (1992)","journal-title":"Discourse Process."},{"key":"142_CR55","unstructured":"Simou, N., Tzouvaras, V., Avrithis, Y., Stamou, G., Kollias, S.: A visual descriptor ontology for multimedia reasoning. In: Proceedings of the workshop on Image Analysis for Multimedia Interactive Services (WIAMIS) (2005)"},{"key":"142_CR56","doi-asserted-by":"crossref","unstructured":"Srikanth, M., Varner, J., Bowden, M., Moldovan, D.: Exploiting ontologies for authomatic image annotation. In: Proceedings of the ACM Special Interest Group in Information Retrieval (SIGIR), pp. 552\u2013558 (2005)","DOI":"10.1145\/1076034.1076128"},{"issue":"3","key":"142_CR57","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1177\/1461445606061881","volume":"8","author":"M. Taboada","year":"2006","unstructured":"Taboada M., Mann W.: Rhetorical structure theory: looking back and moving ahead. Discourse Stud. 8(3), 423\u2013459 (2006)","journal-title":"Discourse Stud."},{"key":"142_CR58","doi-asserted-by":"crossref","unstructured":"Wachsmuth, S., Stevenson, S., Dickinson, S.: Towards a framework for learning structured shape models from text-annotated images. In: Proceedings of the HLT-NAACL Workshop on Learning Word Meaning from non-linguistic Data (2003)","DOI":"10.3115\/1119212.1119216"},{"key":"142_CR59","unstructured":"Whittaker, S., Walker, M.: Toward a theory of multi-modal interaction. In: Proceedings of the National Conference on Artificial Intelligence Workshop on Multi-modal Interaction (1991)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-008-0142-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00530-008-0142-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-008-0142-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,28]],"date-time":"2019-05-28T22:25:28Z","timestamp":1559082328000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00530-008-0142-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,9,13]]},"references-count":59,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2008,11]]}},"alternative-id":["142"],"URL":"https:\/\/doi.org\/10.1007\/s00530-008-0142-0","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008,9,13]]}}}