{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T12:59:59Z","timestamp":1770814799493,"version":"3.50.1"},"publisher-location":"Cham","reference-count":223,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031610653","type":"print"},{"value":"9783031610660","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-61066-0_13","type":"book-chapter","created":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T04:12:49Z","timestamp":1717215169000},"page":"182-234","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["An Outlook for\u00a0AI Innovation in\u00a0Multimodal Communication Research"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2611-1417","authenticated-orcid":false,"given":"Alexander","family":"Henlein","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4630-4590","authenticated-orcid":false,"given":"Anastasia","family":"Bauer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0720-4973","authenticated-orcid":false,"given":"Reetu","family":"Bhattacharjee","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1513-0188","authenticated-orcid":false,"given":"Aleksandra","family":"\u0106wiek","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0143-2000","authenticated-orcid":false,"given":"Alina","family":"Gregori","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8101-0005","authenticated-orcid":false,"given":"Frank","family":"K\u00fcgler","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3661-4752","authenticated-orcid":false,"given":"Jens","family":"Lemanski","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5070-2233","authenticated-orcid":false,"given":"Andy","family":"L\u00fccking","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2567-7539","authenticated-orcid":false,"given":"Alexander","family":"Mehler","sequence":"additional","affiliation":[]},{"given":"Pilar","family":"Prieto","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3394-1013","authenticated-orcid":false,"given":"Paula G.","family":"S\u00e1nchez-Ram\u00f3n","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1271-2526","authenticated-orcid":false,"given":"Job","family":"Schepens","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7198-9923","authenticated-orcid":false,"given":"Martin","family":"Schulte-R\u00fcther","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5762-0188","authenticated-orcid":false,"given":"Stefan R.","family":"Schweinberger","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9662-0751","authenticated-orcid":false,"given":"Celina I.","family":"von Eiff","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,1]]},"reference":[{"key":"13_CR1","unstructured":"Aguirre-Celis, N., Miikkulainen, R.: Understanding the semantic space: how word meanings dynamically adapt in the context of a sentence. In: Proceedings of the 2021 Workshop on Semantic Spaces at the Intersection of NLP, Physics, and Cognitive Science (SemSpace), pp. 1\u201311 (2021)"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Aiyappa, R., An, J., Kwak, H., Ahn, Y.Y.: Can we trust the evaluation on chatgpt? arXiv preprint arXiv:2303.12767 (2023)","DOI":"10.18653\/v1\/2023.trustnlp-1.5"},{"key":"13_CR3","first-page":"24206","volume":"34","author":"H Akbari","year":"2021","unstructured":"Akbari, H., et al.: VATT: transformers for multimodal self-supervised learning from raw video, audio and text. Adv. Neural. Inf. Process. Syst. 34, 24206\u201324221 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Alkaissi, H., McFarlane, S.I.: Artificial hallucinations in chatgpt: implications in scientific writing. Cureus 15(2) (2023)","DOI":"10.7759\/cureus.35179"},{"key":"13_CR5","first-page":"10755","volume":"2023","author":"E Almazrouei","year":"2023","unstructured":"Almazrouei, E., et al.: Falcon-40B: an open large language model with state-of-the-art performance. Find. Assoc. Comput. Linguist. ACL 2023, 10755\u201310773 (2023)","journal-title":"Find. Assoc. Comput. Linguist. ACL"},{"key":"13_CR6","doi-asserted-by":"publisher","unstructured":"Ambrazaitis, G., House, D.: The multimodal nature of prominence: some directions for the study of the relation between gestures and pitch accents. In: Proceedings of the 13th International Conference of Nordic Prosody, pp. 262\u2013273 (2023). https:\/\/doi.org\/10.2478\/9788366675728-024","DOI":"10.2478\/9788366675728-024"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"American Psychiatric Association: Diagnostic and statistical manual of mental disorders, 5th edn. Technical report, American Psychiatric Association, Arlington (2013)","DOI":"10.1176\/appi.books.9780890425596"},{"key":"13_CR8","doi-asserted-by":"publisher","unstructured":"Amici, F., Liebal, K.: Testing hypotheses for the emergence of gestural communication in great and small apes (pan troglodytes, pongo abelii, Symphalangus syndactylus). Int. J. Primatol. (2022). https:\/\/doi.org\/10.1007\/s10764-022-00342-7","DOI":"10.1007\/s10764-022-00342-7"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Andonova, E., Taylor, H.A.: Nodding in dis\/agreement: a tale of two cultures. Cogn. Process. 13(S1), 79\u201382 (2012). https:\/\/doi.org\/10.1007\/s10339-012-0472-x","DOI":"10.1007\/s10339-012-0472-x"},{"key":"13_CR10","doi-asserted-by":"publisher","unstructured":"Anger, C., Berwe, T., Olszok, A., Reichenberger, A., Lemanski, J.: Five dogmas of logic diagrams and how to escape them. Lang. Commun. 87, 258\u2013270 (2022). https:\/\/doi.org\/10.1016\/j.langcom.2022.09.001. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0271530922000775","DOI":"10.1016\/j.langcom.2022.09.001"},{"key":"13_CR11","unstructured":"Anil, R., et al.: PaLM 2 technical report. arXiv:2305.10403 (2023)"},{"key":"13_CR12","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1023\/A:1024716331692","volume":"20","author":"D Archer","year":"1997","unstructured":"Archer, D.: Unspoken diversity: cultural differences in gestures. Qual. Sociol. 20, 79\u2013105 (1997)","journal-title":"Qual. Sociol."},{"issue":"2","key":"13_CR13","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1007\/BF00231718","volume":"103","author":"AS Aruin","year":"1995","unstructured":"Aruin, A.S., Latash, M.L.: Directional specificity of postural muscles in feed-forward postural reactions during fast voluntary arm movements. Exp. Brain Res. 103(2), 323\u2013332 (1995). https:\/\/doi.org\/10.1007\/BF00231718","journal-title":"Exp. Brain Res."},{"issue":"1","key":"13_CR14","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1075\/gest.6.1.05ban","volume":"6","author":"A Bangerter","year":"2006","unstructured":"Bangerter, A., Oppenheimer, D.M.: Accuracy in detecting referents of pointing gestures unaccompanied by language. Gesture 6(1), 85\u2013102 (2006)","journal-title":"Gesture"},{"issue":"1","key":"13_CR15","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1111\/lnc3.12170","volume":"10","author":"M Baroni","year":"2016","unstructured":"Baroni, M.: Grounding distributional semantics in the visual world. Lang. Linguist. Compass 10(1), 3\u201313 (2016)","journal-title":"Lang. Linguist. Compass"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Barwise, J., Etchemendy, J.: Chapter VIII heterogeneous logic. In: Logical Reasoning with Diagrams, pp. 179\u2013200 (1996)","DOI":"10.1093\/oso\/9780195104271.003.0014"},{"key":"13_CR17","doi-asserted-by":"publisher","unstructured":"Bauer, A.: Russian multimodal conversational data (2023). https:\/\/doi.org\/10.18716\/DCH\/A.00000016. https:\/\/dch.phil-fak.uni-koeln.de\/bestaende\/datensicherung\/russian-multimodal-conversational-data","DOI":"10.18716\/DCH\/A.00000016"},{"key":"13_CR18","doi-asserted-by":"publisher","unstructured":"Bauer, A., Poryadin, R.: Russian sign language conversations (2023). https:\/\/dch.phil-fak.uni-koeln.de\/bestaende\/datensicherung\/russian-sign-language-conversations. https:\/\/doi.org\/10.18716\/DCH\/A.00000028","DOI":"10.18716\/DCH\/A.00000028"},{"issue":"6","key":"13_CR19","doi-asserted-by":"publisher","first-page":"941","DOI":"10.1037\/0022-3514.79.6.941","volume":"79","author":"JB Bavelas","year":"2000","unstructured":"Bavelas, J.B., Coates, L., Johnson, T.: Listeners as co-narrators. J. Pers. Soc. Psychol. 79(6), 941\u2013952 (2000). https:\/\/doi.org\/10.1037\/0022-3514.79.6.941","journal-title":"J. Pers. Soc. Psychol."},{"key":"13_CR20","unstructured":"Beckman, M.E., Ayers-Elam, G.: Guidelines for ToBI Labelling: Version 3. Ohio State University (1997). http:\/\/www.ling.ohio-state.edu\/~tobi\/ame_tobi\/labelling_guide_v3.pdf"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Bellucci, F., Pietarinen, A.V.: Two dogmas of diagrammatic reasoning: a view from existential graphs. In: Peirce on Perception and Reasoning: From icons to logic, pp. 174\u2013195. Routledge (2017)","DOI":"10.4324\/9781315444642-13"},{"key":"13_CR22","unstructured":"Bertsch, A., Alon, U., Neubig, G., Gormley, M.R.: Unlimiformer: long-range transformers with unlimited length input. arXiv preprint arXiv:2305.01625 (2023)"},{"key":"13_CR23","unstructured":"Betker, J., et al.: Improving image generation with better captions. Comput. Sci. 2(3), 8 (2023). https:\/\/cdnopenai.com\/papers\/dall-e-3.pdf"},{"issue":"3","key":"13_CR24","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1007\/s11787-019-00227-z","volume":"13","author":"R Bhattacharjee","year":"2019","unstructured":"Bhattacharjee, R., Chakraborty, M.K., Choudhury, L.: Venn$$_{i_{o_{1}}}$$: a diagram system for universe without boundary. Logica Univers. 13(3), 289\u2013346 (2019). https:\/\/doi.org\/10.1007\/s11787-019-00227-z","journal-title":"Logica Univers."},{"key":"13_CR25","doi-asserted-by":"publisher","unstructured":"Bhattacharjee, R., Moktefi, A.: Revisiting peirce\u2019s rules of transformation for euler-venn diagrams. In: Basu, A., Stapleton, G., Linker, S., Legg, C., Manalo, E., Viana, P. (eds.) Diagrammatic Representation and Inference. LNCS, vol. 12909, pp. 166\u2013182. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86062-2_14","DOI":"10.1007\/978-3-030-86062-2_14"},{"key":"13_CR26","unstructured":"Boersma, P., Weenink, D.: Praat: doing phonetics by computer. www.praat.org\/"},{"key":"13_CR27","unstructured":"Bolly, C.: CorpAGEst Annotation Manual. (II. Speech Annotation Guidelines) (2016)"},{"key":"13_CR28","doi-asserted-by":"publisher","unstructured":"Bolt, R.A.: \u201cput-that-there\u201d: voice and gesture at the graphics interface. SIGGRAPH Comput. Graph. 14, 262\u2013270 (1980). https:\/\/doi.org\/10.1145\/965105.807503","DOI":"10.1145\/965105.807503"},{"issue":"1","key":"13_CR29","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1080\/0907676X.2013.876057","volume":"23","author":"M Borodo","year":"2015","unstructured":"Borodo, M.: Multimodality, translation and comics. Perspectives 23(1), 22\u201341 (2015)","journal-title":"Perspectives"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Braunschweiler, N.: The Prosodizer \u2013 automatic prosodic annotations of speech synthesis databases. In: Proceedings of Speech Prosody, vol.\u00a02006 (2006)","DOI":"10.21437\/SpeechProsody.2006-136"},{"key":"13_CR31","doi-asserted-by":"publisher","unstructured":"Bressem, J., Ladewig, S.H., M\u00fcller, C.: Linguistic annotation system for gestures. In: M\u00fcller, C., Cienki, A., Fricke, E., Ladewig, S., McNeill, D., Te\u00dfendorf, S. (eds.) Body \u2013 Language \u2013 Communication. An International Handbook on Multimodality in Human Interaction, Handb\u00fccher zur Sprach- und Kommunikationswissenschaft\/Handbooks of Linguistics and Communication Science (HSK) 38\/1, vol.\u00a01, chap.\u00a071, pp. 1098\u20131124. De Gruyter Mouton, Berlin and Boston (2013). https:\/\/doi.org\/10.1515\/9783110261318.1098","DOI":"10.1515\/9783110261318.1098"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Brown, L., Prieto, P.: Gesture and prosody in multimodal communication. In: Haugh, M., K\u00e1d\u00e1r, D.Z., Terkourafi, M. (eds.) The Cambridge Handbook of Sociopragmatics, chap.\u00a021, pp. 430\u2013453. Cambridge University Press, Cambridge (2021)","DOI":"10.1017\/9781108954105.023"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Bulat, L., Clark, S., Shutova, E.: Speaking, seeing, understanding: correlating semantic models with conceptual representation in the brain. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 1081\u20131091 (2017)","DOI":"10.18653\/v1\/D17-1113"},{"key":"13_CR34","unstructured":"Cao, Y., et al.: A comprehensive survey of AI-generated content (AIGC): a history of generative AI from GAN to chatgpt. arXiv preprint arXiv:2303.04226 (2023)"},{"key":"13_CR35","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2929257","author":"Z Cao","year":"2019","unstructured":"Cao, Z., Hidalgo Martinez, G., Simon, T., Wei, S., Sheikh, Y.A.: OpenPose: realtime multi-person 2D pose estimation using part affinity fields. IEEE Trans. Pattern Anal. Mach. Intell. (2019). https:\/\/doi.org\/10.1109\/TPAMI.2019.2929257","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Chen, J., Ho, C.M.: MM-VIT: multi-modal video transformer for compressed video action recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 1910\u20131921 (2022)","DOI":"10.1109\/WACV51458.2022.00086"},{"key":"13_CR37","unstructured":"Chu, J., Liu, Y., Yang, Z., Shen, X., Backes, M., Zhang, Y.: Comprehensive assessment of jailbreak attacks against LLMS. arXiv preprint arXiv:2402.05668 (2024)"},{"key":"13_CR38","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511620539","volume-title":"Using Language","author":"HH Clark","year":"1996","unstructured":"Clark, H.H.: Using Language. Cambridge University Press, Cambridge (1996)"},{"key":"13_CR39","doi-asserted-by":"publisher","unstructured":"Cowen, A.S., Keltner, D.: What the face displays: mapping 28 emotions conveyed by naturalistic expression. Am. Psychol. 75(3), 349\u2013364 (2020). https:\/\/doi.org\/10.1037\/amp0000488","DOI":"10.1037\/amp0000488"},{"key":"13_CR40","doi-asserted-by":"publisher","unstructured":"Davidson, D.: Three varieties of knowledge. Roy. Inst. Philos. Suppl. 30, 153\u2013166 (1991). https:\/\/doi.org\/10.1017\/S1358246100007748","DOI":"10.1017\/S1358246100007748"},{"issue":"2","key":"13_CR41","doi-asserted-by":"publisher","first-page":"385","DOI":"10.3758\/BRM.41.2.385","volume":"41","author":"NH De Jong","year":"2009","unstructured":"De Jong, N.H., Wempe, T.: Praat script to detect syllable nuclei and measure speech rate automatically. Behav. Res. Methods 41(2), 385\u2013390 (2009)","journal-title":"Behav. Res. Methods"},{"key":"13_CR42","doi-asserted-by":"publisher","unstructured":"Demey, L., Smessaert, H.: A database of aristotelian diagrams: empirical foundations for logical geometry. In: Giardino, V., Linker, S., Burns, R., Bellucci, F., Boucheix, J.M., Viana, P. (eds.) Theory and Application of Diagrams, pp. 123\u2013131. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-15146-0_10","DOI":"10.1007\/978-3-031-15146-0_10"},{"issue":"4","key":"13_CR43","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1515\/COG.2006.015","volume":"17","author":"H Diessel","year":"2006","unstructured":"Diessel, H.: Demonstratives, joint attention, and the emergence of grammar. Cogn. Linguist. 17(4), 463\u2013489 (2006). https:\/\/doi.org\/10.1515\/COG.2006.015","journal-title":"Cogn. Linguist."},{"key":"13_CR44","doi-asserted-by":"publisher","unstructured":"Dingemanse, M., Enfield, N.J.: Other-initiated repair across languages: towards a typology of conversational structures. Open Linguist. 1(1) (2015). https:\/\/doi.org\/10.2478\/opli-2014-0007. https:\/\/www.degruyter.com\/doi\/10.2478\/opli-2014-0007","DOI":"10.2478\/opli-2014-0007"},{"issue":"19","key":"13_CR45","doi-asserted-by":"publisher","first-page":"7561","DOI":"10.3390\/s22197561","volume":"22","author":"C Dogdu","year":"2022","unstructured":"Dogdu, C., Kessler, T., Schneider, D., Shadaydeh, M., Schweinberger, S.R.: A comparison of machine learning algorithms and feature sets for automatic vocal emotion recognition in speech. Sensors 22(19), 7561 (2022)","journal-title":"Sensors"},{"key":"13_CR46","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.anbehav.2023.03.020","volume":"201","author":"E Doherty","year":"2023","unstructured":"Doherty, E., Davila Ross, M., Clay, Z.: Multimodal communication development in semi-wild chimpanzees. Anim. Behav. 201, 175\u2013190 (2023)","journal-title":"Anim. Behav."},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Dong, L., Xu, S., Xu, B.: Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5884\u20135888. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"13_CR48","doi-asserted-by":"publisher","unstructured":"Douglas, P.H., Moscovice, L.R.: Pointing and pantomime in wild apes? Female bonobos use referential and iconic gestures to request genito-genital rubbing. Sci. Rep. 5(1) (2015). https:\/\/doi.org\/10.1038\/srep13999","DOI":"10.1038\/srep13999"},{"key":"13_CR49","doi-asserted-by":"publisher","unstructured":"D\u00fcking, P., Sperlich, B., Voigt, L., Van\u00a0Hooren, B., Zanini, M., Zinner, C.: ChatGPT generated training plans for runners are not rated optimal by coaching experts, but increase in quality with additional input information. J. Sports Sci. Med. 23, 56\u201372 (2024). https:\/\/doi.org\/10.52082\/jssm.2024.56","DOI":"10.52082\/jssm.2024.56"},{"key":"13_CR50","doi-asserted-by":"crossref","unstructured":"von Eiff, C.I., Fr\u00fchholz, S., Korth, D., Guntinas-Lichius, O., Schweinberger, S.R.: Crossmodal benefits to vocal emotion perception in cochlear implant users. iScience 25(12) (2022)","DOI":"10.1016\/j.isci.2022.105711"},{"issue":"4","key":"13_CR51","doi-asserted-by":"publisher","first-page":"1178","DOI":"10.1097\/AUD.0000000000001181","volume":"43","author":"CI von Eiff","year":"2022","unstructured":"von Eiff, C.I., et al.: Parameter-specific morphing reveals contributions of timbre to the perception of vocal emotions in cochlear implant users. Ear Hear. 43(4), 1178 (2022)","journal-title":"Ear Hear."},{"issue":"1","key":"13_CR52","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1515\/semi.1969.1.1.49","volume":"1","author":"P Ekman","year":"1969","unstructured":"Ekman, P., Friesen, W.V.: The repertoire of nonverbal behavior: categories, origins, usage, and coding. Semiotica 1(1), 49\u201398 (1969)","journal-title":"Semiotica"},{"key":"13_CR53","unstructured":"Engelen, J., Bernareggi, C.: Ascience: a thematic network on access to scientific university courses by visually impaired students. In: Challenges for Assistive Technology, vol.\u00a020, pp. 304\u2013309. IOS Press (2007). https:\/\/lirias.kuleuven.be\/56044"},{"key":"13_CR54","doi-asserted-by":"crossref","unstructured":"Engelhardt, Y., Richards, C.: A framework for analyzing and designing diagrams and graphics. In: Diagrams (2018). https:\/\/api.semanticscholar.org\/CorpusID:49189675","DOI":"10.1007\/978-3-319-91376-6_20"},{"issue":"3","key":"13_CR55","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1044\/1092-4388(2012\/12-0049)","volume":"56","author":"N Esteve-Gibert","year":"2013","unstructured":"Esteve-Gibert, N., Prieto, P.: Prosodic structure shapes the temporal realization of intonation and manual gesture movements. J. Speech Lang. Hear. Res. 56(3), 850\u2013864 (2013)","journal-title":"J. Speech Lang. Hear. Res."},{"key":"13_CR56","doi-asserted-by":"crossref","unstructured":"Esteve-Gibert, N., Guella\u00ef, B.: Prosody in the auditory and visual domains: a developmental perspective. Front. Psychol. 9 (2018). https:\/\/www.frontiersin.org\/articles\/10.3389\/fpsyg.2018.00338","DOI":"10.3389\/fpsyg.2018.00338"},{"issue":"1","key":"13_CR57","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11571-012-9219-8","volume":"7","author":"J Feldman","year":"2013","unstructured":"Feldman, J.: The neural binding problem(s). Cogn. Neurodyn. 7(1), 1\u201311 (2013). https:\/\/doi.org\/10.1007\/s11571-012-9219-8","journal-title":"Cogn. Neurodyn."},{"key":"13_CR58","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/11783183_14","volume-title":"Theory and Application of Diagrams","author":"A Fish","year":"2006","unstructured":"Fish, A., Stapleton, G.: Defining euler diagrams: simple or what? In: Barker-Plummer, D., Cox, R., Swoboda, N. (eds.) Theory and Application of Diagrams, pp. 109\u2013111. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11783183_14"},{"issue":"2","key":"13_CR59","first-page":"58","volume":"1","author":"G Frege","year":"1918","unstructured":"Frege, G.: Der Gedanke. Beitr\u00e4ge zur Philosophie des deutschen Idealismus 1(2), 58\u201377 (1918)","journal-title":"Beitr\u00e4ge zur Philosophie des deutschen Idealismus"},{"key":"13_CR60","unstructured":"Frieder, S., et al.: Mathematical capabilities of chatgpt. arXiv preprint arXiv:2301.13867 (2023)"},{"key":"13_CR61","unstructured":"Fuchs, S., et al.: Exploring the sound structure of novel vocalizations. In: Proceedings of EVOLANG 2024, Madison, Wisconsin, USA (2024)"},{"key":"13_CR62","doi-asserted-by":"crossref","unstructured":"Galaz\u00a0Garc\u00eda, C., et\u00a0al.: The future of ecosystem assessments is automation, collaboration, and artificial intelligence. Environ. Res. Lett. 18 (2023)","DOI":"10.1088\/1748-9326\/acab19"},{"key":"13_CR63","doi-asserted-by":"publisher","unstructured":"Gardner, R.: When Listeners Talk: Response tokens and listener stance, Pragmatics & Beyond New Series, vol.\u00a092. John Benjamins Publishing Company, Amsterdam (2001). https:\/\/doi.org\/10.1075\/pbns.92. http:\/\/www.jbe-platform.com\/content\/books\/9789027297426","DOI":"10.1075\/pbns.92"},{"key":"13_CR64","doi-asserted-by":"publisher","unstructured":"Giaquinto, M.: Crossing curves: a limit to the use of diagrams in proofs$$\\dagger $$. Philosophia Math. 19(3), 281\u2013307 (2011). https:\/\/doi.org\/10.1093\/philmat\/nkr023","DOI":"10.1093\/philmat\/nkr023"},{"key":"13_CR65","doi-asserted-by":"publisher","unstructured":"Giardino, V.: Diagrammatic proofs in mathematics: (almost) 20 years of research. In: Sriraman, B. (ed.) Handbook of the History and Philosophy of Mathematical Practice, pp. 1\u201323. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-19071-2_46-1","DOI":"10.1007\/978-3-030-19071-2_46-1"},{"key":"13_CR66","volume-title":"Constraint-Based Syntax and Semantics: Papers in Honor of Dani\u00e8le Godard","author":"J Ginzburg","year":"2020","unstructured":"Ginzburg, J., Cooper, R., Hough, J., Schlangen, D.: Incrementality and HPSG: why not? In: Abeill\u00e9, A., Bonami, O. (eds.) Constraint-Based Syntax and Semantics: Papers in Honor of Dani\u00e8le Godard. CSLI Publications, Stanford (2020)"},{"key":"13_CR67","doi-asserted-by":"publisher","unstructured":"Ginzburg, J., L\u00fccking, A.: I thought pointing is rude: a dialogue-semantic analysis of pointing at the addressee. In: Grosz, P., Mart\u00ed, L., Pearson, H., Sudo, Y., Zobel, S. (eds.) Proceedings of Sinn und Bedeutung 25, pp. 276\u2013291. SuB 25 (2021). https:\/\/doi.org\/10.18148\/sub\/2021.v25i0.937. https:\/\/ojs.ub.uni-konstanz.de\/sub\/index.php\/sub\/article\/view\/937","DOI":"10.18148\/sub\/2021.v25i0.937"},{"issue":"1","key":"13_CR68","doi-asserted-by":"publisher","first-page":"104","DOI":"10.5334\/gjgl.1152","volume":"5","author":"J Ginzburg","year":"2020","unstructured":"Ginzburg, J., Mazzocconi, C., Tian, Y.: Laughter as language. Glossa 5(1), 104 (2020). https:\/\/doi.org\/10.5334\/gjgl.1152","journal-title":"Glossa"},{"key":"13_CR69","doi-asserted-by":"publisher","unstructured":"Goldstein, L.: Teaching syllogistic to the blind. In: Gorayska, B., Mey, J.L. (eds.) Advances in Psychology, Cognitive Technology, vol.\u00a0113, pp. 243\u2013255. North-Holland (1996). https:\/\/doi.org\/10.1016\/S0166-4115(96)80035-5. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0166411596800355","DOI":"10.1016\/S0166-4115(96)80035-5"},{"key":"13_CR70","doi-asserted-by":"publisher","unstructured":"Gonitsioti, H., Christidou, V., Hatzinikita, V.: Enhancing scientific visual literacy in kindergarten: young children \u2018read\u2019 and produce representations of classification. Int. J. Sci. Math. Technol. Learn. 20(1), 1\u201315 (2013). https:\/\/doi.org\/10.18848\/2327-7971\/CGP\/v20i01\/48996. https:\/\/cgscholar.com\/bookstore\/works\/enhancing-scientific-visual-literacy-in-kindergarten","DOI":"10.18848\/2327-7971\/CGP\/v20i01\/48996"},{"key":"13_CR71","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-031-35748-0_30","volume-title":"Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management","author":"A Gregori","year":"2023","unstructured":"Gregori, A., et al.: A roadmap for technological innovation in multimodal communication research. In: Duffy, V.G. (ed.) Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management, pp. 402\u2013438. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-35748-0_30"},{"issue":"1","key":"13_CR72","doi-asserted-by":"publisher","first-page":"199","DOI":"10.5087\/dad.2011.109","volume":"2","author":"E Gregoromichelaki","year":"2011","unstructured":"Gregoromichelaki, E., et al.: Incrementality and intention-recognition in utterance processing. Dialogue Discourse 2(1), 199\u2013233 (2011). https:\/\/doi.org\/10.5087\/dad.2011.109","journal-title":"Dialogue Discourse"},{"key":"13_CR73","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1093\/acprof:oso\/9780199249633.003.0003","volume-title":"Prosodic Typology: The Phonology of Intonation and Phrasing","author":"M Grice","year":"2005","unstructured":"Grice, M., Baumann, S., Benzm\u00fcller, R.: German intonation in autosegmental-metrical phonology. In: Jun, S.A. (ed.) Prosodic Typology: The Phonology of Intonation and Phrasing, pp. 55\u201383. Oxford University Press, Oxford (2005)"},{"key":"13_CR74","doi-asserted-by":"crossref","unstructured":"Grice, M., Reyelt, M., Benzm\u00fcller, R., Mayer, J., Batliner, A.: Consistency in transcription and labelling of German intonation with GToBI. In: Proceedings of the Fourth International Conference on Spoken Language Processing, vol.\u00a03, pp. 1716\u20131719. Philadelphia and USA (1996)","DOI":"10.21437\/ICSLP.1996-436"},{"issue":"1481","key":"13_CR75","doi-asserted-by":"publisher","first-page":"801","DOI":"10.1098\/rstb.2007.2089","volume":"362","author":"P Hagoort","year":"2007","unstructured":"Hagoort, P., van Berkum, J.: Beyond the sentence given. Philos. Trans. R. Soc. B Biol. Sci. 362(1481), 801\u2013811 (2007). https:\/\/doi.org\/10.1098\/rstb.2007.2089","journal-title":"Philos. Trans. R. Soc. B Biol. Sci."},{"key":"13_CR76","doi-asserted-by":"publisher","unstructured":"Hahn, H.: The crisis in intuition. In: Hahn, H., McGuinness, B. (eds.) Empiricism, Logic and Mathematics: Philosophical Papers. Vienna Circle Collection, pp. 73\u2013102. Springer, Dordrecht (1980). https:\/\/doi.org\/10.1007\/978-94-009-8982-5_7","DOI":"10.1007\/978-94-009-8982-5_7"},{"issue":"2","key":"13_CR77","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MIS.2009.36","volume":"24","author":"A Halevy","year":"2009","unstructured":"Halevy, A., Norvig, P., Pereira, F.: The unreasonable effectiveness of data. IEEE Intell. Syst. 24(2), 8\u201312 (2009)","journal-title":"IEEE Intell. Syst."},{"key":"13_CR78","volume-title":"Logic and Visual Information","author":"EM Hammer","year":"1995","unstructured":"Hammer, E.M.: Logic and Visual Information. CSLI Publications, Stanford (1995)"},{"issue":"2","key":"13_CR79","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1080\/24751448.2021.1967058","volume":"5","author":"E Han","year":"2021","unstructured":"Han, E.: Integrating mobile eye-tracking and VSLAM for recording spatial gaze in works of art and architecture. Technol. Archit. Des. 5(2), 177\u2013187 (2021). https:\/\/doi.org\/10.1080\/24751448.2021.1967058","journal-title":"Technol. Archit. Des."},{"key":"13_CR80","unstructured":"Han, K., Xiao, A., Wu, E., Guo, J., Xu, C., Wang, Y.: Transformer in transformer. In: Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems, vol.\u00a034, pp. 15908\u201315919. Curran Associates, Inc. (2021). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/854d9fca60b4bd07f9bb215d59ef5561-Paper.pdf"},{"issue":"1\u20133","key":"13_CR81","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1016\/0167-2789(90)90087-6","volume":"42","author":"S Harnad","year":"1990","unstructured":"Harnad, S.: The symbol grounding problem. Physica D 42(1\u20133), 335\u2013346 (1990). https:\/\/doi.org\/10.1016\/0167-2789(90)90087-6","journal-title":"Physica D"},{"key":"13_CR82","doi-asserted-by":"publisher","unstructured":"Harnad, S.: Minds, machines and Turing. In: Moor, J.H. (ed.) The Turing Test: The Elusive Standard of Artificial Intelligence, pp. 253\u2013273. Springer, Dordrecht (2003). https:\/\/doi.org\/10.1007\/978-94-010-0105-2_14","DOI":"10.1007\/978-94-010-0105-2_14"},{"key":"13_CR83","doi-asserted-by":"crossref","unstructured":"Harnad, S.: Language writ large: LLMS, chatgpt, grounding, meaning and understanding. arXiv arXiv:2402.02243 (2024)","DOI":"10.31234\/osf.io\/ch2wx"},{"key":"13_CR84","doi-asserted-by":"publisher","unstructured":"Hartz, A., Guth, B., Jording, M., Vogeley, K., Schulte-R\u00fcther, M.: Temporal behavioral parameters of on-going gaze encounters in a virtual environment. Front. Psychol. 12, 673982 (2021). https:\/\/doi.org\/10.3389\/fpsyg.2021.673982","DOI":"10.3389\/fpsyg.2021.673982"},{"issue":"2","key":"13_CR85","doi-asserted-by":"publisher","first-page":"62","DOI":"10.3390\/bdcc7020062","volume":"7","author":"H Hassani","year":"2023","unstructured":"Hassani, H., Silva, E.S.: The role of chatgpt in data science: how AI-assisted conversational interfaces are revolutionizing the field. Big Data Cogn. Comput. 7(2), 62 (2023)","journal-title":"Big Data Cogn. Comput."},{"key":"13_CR86","doi-asserted-by":"publisher","first-page":"641","DOI":"10.3758\/s13423-020-01823-7","volume":"28","author":"O Herbort","year":"2021","unstructured":"Herbort, O., Krause, L.M., Kunde, W.: Perspective determines the production and interpretation of pointing gestures. Psychon. Bull. Rev. 28, 641\u2013648 (2021). https:\/\/doi.org\/10.3758\/s13423-020-01823-7","journal-title":"Psychon. Bull. Rev."},{"key":"13_CR87","doi-asserted-by":"publisher","unstructured":"Herbort, O., Kunde, W.: Spatial (mis-)interpretation of pointing gestures to distal referents. J. Exp. Psychol. Hum. Percept. Perform. (2015). https:\/\/doi.org\/10.1037\/xhp0000126","DOI":"10.1037\/xhp0000126"},{"key":"13_CR88","unstructured":"Hoffmann, J., et\u00a0al.: Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)"},{"key":"13_CR89","doi-asserted-by":"crossref","unstructured":"Hohwy, J.: The predictive processing hypothesis. In: The Oxford Handbook of 4E Cognition, pp. 129\u2013145 (2018)","DOI":"10.1093\/oxfordhb\/9780198735410.013.7"},{"key":"13_CR90","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/978-3-642-12553-9_2","volume-title":"GW 2009","author":"J Holler","year":"2010","unstructured":"Holler, J.: Speakers\u2019 use of interactive gestures as markers of common ground. In: Kopp, S., Wachsmuth, I. (eds.) GW 2009. LNCS, vol. 5934, pp. 11\u201322. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-12553-9_2"},{"issue":"8","key":"13_CR91","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1016\/j.tics.2019.05.006","volume":"23","author":"J Holler","year":"2019","unstructured":"Holler, J., Levinson, S.C.: Multimodal language processing in human communication. Trends Cogn. Sci. 23(8), 639\u2013652 (2019). https:\/\/doi.org\/10.1016\/j.tics.2019.05.006","journal-title":"Trends Cogn. Sci."},{"issue":"3","key":"13_CR92","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1006\/jvlc.2000.0210","volume":"12","author":"J Howse","year":"2001","unstructured":"Howse, J., Molina, F., Taylor, J., Kent, S., Gil, J.: Spider diagrams: a diagrammatic reasoning system. J. Vis. Lang. Comput. 12(3), 299\u2013324 (2001)","journal-title":"J. Vis. Lang. Comput."},{"key":"13_CR93","doi-asserted-by":"crossref","unstructured":"Huang, B., Shu, Y., Zhang, T., Wang, Y.: Dynamic multi-person mesh recovery from uncalibrated multi-view cameras. In: 3DV (2021)","DOI":"10.1109\/3DV53792.2021.00080"},{"issue":"4","key":"13_CR94","first-page":"1148","volume":"13","author":"J Huang","year":"2023","unstructured":"Huang, J., Tan, M.: The role of chatgpt in scientific communication: writing better scientific review articles. Am. J. Cancer Res. 13(4), 1148 (2023)","journal-title":"Am. J. Cancer Res."},{"issue":"3","key":"13_CR95","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1007\/s10579-022-09586-4","volume":"56","author":"N Ienaga","year":"2022","unstructured":"Ienaga, N., Cravotta, A., Terayama, K., Scotney, B.W., Saito, H., Busa, M.G.: Semi-automation of gesture annotation by machine learning and human collaboration. Lang. Resour. Eval. 56(3), 673\u2013700 (2022). https:\/\/doi.org\/10.1007\/s10579-022-09586-4","journal-title":"Lang. Resour. Eval."},{"issue":"18","key":"13_CR96","doi-asserted-by":"publisher","first-page":"1543","DOI":"10.1016\/j.cub.2009.07.051","volume":"19","author":"RE Jack","year":"2009","unstructured":"Jack, R.E., Blais, C., Scheepers, C., Schyns, P.G., Caldara, R.: Cultural confusions show that facial expressions are not universal. Curr. Biol. 19(18), 1543\u20131548 (2009)","journal-title":"Curr. Biol."},{"key":"13_CR97","doi-asserted-by":"publisher","DOI":"10.1093\/acprof:oso\/9780198270126.001.0001","volume-title":"Foundations of Language","author":"R Jackendoff","year":"2002","unstructured":"Jackendoff, R.: Foundations of Language. Oxford University Press, Oxford (2002)"},{"key":"13_CR98","unstructured":"Jamnik, M.: Mathematical Reasoning with Diagrams. Lecture Notes, Center for the Study of Language and Information (2001). https:\/\/press.uchicago.edu\/ucp\/books\/book\/distributed\/M\/bo3614100.html"},{"key":"13_CR99","unstructured":"Jang, J., Ye, S., Seo, M.: Can large language models truly understand prompts? A case study with negated prompts. In: Transfer Learning for Natural Language Processing Workshop, pp. 52\u201362. PMLR (2023)"},{"key":"13_CR100","doi-asserted-by":"publisher","unstructured":"Johansen, M.W.: What\u2019s in a diagram? On the classification of symbols, figures and diagrams. In: Magnani, L. (eds.) Model-Based Reasoning in Science and Technology: Theoretical and Cognitive Issues, pp. 89\u2013108. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-37428-9_6","DOI":"10.1007\/978-3-642-37428-9_6"},{"issue":"3","key":"13_CR101","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1007\/s11229-022-03741-8","volume":"200","author":"MW Johansen","year":"2022","unstructured":"Johansen, M.W., Pallavicini, J.L.: Entering the valley of formalism: trends and changes in mathematicians\u2019 publication practice-1885 to 2015. Synthese 200(3), 239 (2022)","journal-title":"Synthese"},{"key":"13_CR102","doi-asserted-by":"publisher","unstructured":"Kadav\u00e1, \u0160., \u0106wiek, A., Stoltmann, K., Fuchs, S., Pouw, W.: Is gesture-speech physics at work in rhythmic pointing? Evidence from Polish counting-out rhymes. In: Proceedings of the 20th International Congress of Phonetic Sciences, Prague, Czech Republic (2023). https:\/\/doi.org\/10.31219\/osf.io\/67fzc. https:\/\/osf.io\/67fzc","DOI":"10.31219\/osf.io\/67fzc"},{"key":"13_CR103","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511807572","volume-title":"Gesture: Visible Action as Utterance","author":"A Kendon","year":"2004","unstructured":"Kendon, A.: Gesture: Visible Action as Utterance. Cambridge University Press, Cambridge (2004)"},{"issue":"10s","key":"13_CR104","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3505244","volume":"54","author":"S Khan","year":"2022","unstructured":"Khan, S., Naseer, M., Hayat, M., Zamir, S.W., Khan, F.S., Shah, M.: Transformers in vision: a survey. ACM Comput. Surv. (CSUR) 54(10s), 1\u201341 (2022)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"13_CR105","doi-asserted-by":"crossref","unstructured":"Kiela, D., Bulat, L., Clark, S.: Grounding semantics in olfactory perception. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 231\u2013236 (2015)","DOI":"10.3115\/v1\/P15-2038"},{"key":"13_CR106","doi-asserted-by":"publisher","first-page":"1003","DOI":"10.1613\/jair.5665","volume":"60","author":"D Kiela","year":"2017","unstructured":"Kiela, D., Clark, S.: Learning neural audio embeddings for grounding semantics in auditory perception. J. Artif. Intell. Res. 60, 1003\u20131030 (2017)","journal-title":"J. Artif. Intell. Res."},{"key":"13_CR107","unstructured":"Kisler, T., Schiel, F., Sloetjes, H.: Signal processing via web services: the use case WebMAUS. In: Digital Humanities Conference 2012, p.\u00a05 (2012)"},{"issue":"7","key":"13_CR108","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1111\/1469-7610.00671","volume":"41","author":"A Klin","year":"2000","unstructured":"Klin, A.: Attributing social meaning to ambiguous visual stimuli in higher-functioning autism and Asperger syndrome: the social attribution task. J. Child Psychol. Psychiatry Allied Discip. 41(7), 831\u2013846 (2000)","journal-title":"J. Child Psychol. Psychiatry Allied Discip."},{"key":"13_CR109","doi-asserted-by":"publisher","unstructured":"Konrad, R., et al.: My DGS - annotated. public corpus of German sign language, 3rd release (2020). https:\/\/doi.org\/10.25592\/dgs.corpus-3.0","DOI":"10.25592\/dgs.corpus-3.0"},{"key":"13_CR110","unstructured":"Kosorukoff, A.: Human based genetic algorithm. In: IEEE International Conference on Systems, Man, and Cybernetics, vol.\u00a05, pp. 3464\u20133469 (2001). http:\/\/ieeexplore.ieee.org\/xpls\/abs_all.jsp?arnumber=972056"},{"key":"13_CR111","doi-asserted-by":"publisher","unstructured":"Krahmer, E., Swerts, M.: The effects of visual beats on prosodic prominence: acoustic analyses, auditory perception and visual perception. J. Mem. Lang. 57(3), 396\u2013414 (2007). https:\/\/doi.org\/10.1016\/j.jml.2007.06.005. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0749596X07000708","DOI":"10.1016\/j.jml.2007.06.005"},{"key":"13_CR112","unstructured":"Kranstedt, A.: Situierte Generierung deiktischer Objektreferenz in der multimodalen Mensch-Maschine-Interaktion. No.\u00a0313 in Diski, Aka, Berlin. Ph.D. thesis, Bielefeld University (2008)"},{"key":"13_CR113","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1515\/9783110197747.155","volume-title":"Situated Communication","author":"A Kranstedt","year":"2006","unstructured":"Kranstedt, A., L\u00fccking, A., Pfeiffer, T., Rieser, H., Wachsmuth, I.: Deictic object reference in task-oriented dialogue. In: Rickheit, G., Wachsmuth, I. (eds.) Situated Communication, pp. 155\u2013207. Mouton de Gruyter, Berlin (2006)"},{"key":"13_CR114","doi-asserted-by":"publisher","unstructured":"Krivokapi\u0107, J.: Gestural coordination at prosodic boundaries and its role for prosodic structure and speech planning processes. Philos. Trans. R. Soc. B Biol. Sci. 369(1658), 20130397 (2014). https:\/\/doi.org\/10.1098\/rstb.2013.0397. https:\/\/royalsocietypublishing.org\/doi\/10.1098\/rstb.2013.0397","DOI":"10.1098\/rstb.2013.0397"},{"key":"13_CR115","doi-asserted-by":"publisher","unstructured":"Kuder, A., Bauer, A.: Polish multimodal conversational data (2023). https:\/\/doi.org\/10.18716\/DCH\/A.00000017. https:\/\/dch.phil-fak.uni-koeln.de\/bestaende\/datensicherung\/polish-multimodal-conversational-data","DOI":"10.18716\/DCH\/A.00000017"},{"key":"13_CR116","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511808814","volume-title":"Intonational Phonology","author":"DR Ladd","year":"2008","unstructured":"Ladd, D.R.: Intonational Phonology, 2nd edn. Cambridge University Press, Cambridge (2008)","edition":"2"},{"key":"13_CR117","doi-asserted-by":"publisher","first-page":"15205","DOI":"10.1038\/s41598-018-33543-3","volume":"8","author":"J Lane","year":"2018","unstructured":"Lane, J., et al.: Improving face identity perception in age-related macular degeneration via caricaturing. Sci. Rep. 8, 15205 (2018)","journal-title":"Sci. Rep."},{"issue":"3","key":"13_CR118","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1002\/rrq.326","volume":"56","author":"J Lawson-Adams","year":"2021","unstructured":"Lawson-Adams, J., Dickinson, D.K.: Building lexical representations with nonverbal supports. Read. Res. Q. 56(3), 603\u2013622 (2021)","journal-title":"Read. Res. Q."},{"key":"13_CR119","doi-asserted-by":"crossref","unstructured":"Lee, U., et al.: Few-shot is enough: exploring ChatGPT prompt engineering method for automatic question generation in english education. Educ. Inf. Technol. 1\u201333 (2023)","DOI":"10.1007\/s10639-023-12249-8"},{"key":"13_CR120","doi-asserted-by":"crossref","unstructured":"Levinson, S.C.: Deixis. In: Horn, L.R., Ward, G. (eds.) The Handbook of Pragmatics, chap.\u00a05, pp. 97\u2013121. Blackwell (2008)","DOI":"10.1002\/9780470756959.ch5"},{"key":"13_CR121","doi-asserted-by":"publisher","unstructured":"Levinson, S.C., Torreira, F.: Timing in turn-taking and its implications for processing models of language. Front. Psychol. 6(731) (2015). https:\/\/doi.org\/10.3389\/fpsyg.2015.00731","DOI":"10.3389\/fpsyg.2015.00731"},{"key":"13_CR122","doi-asserted-by":"crossref","unstructured":"Li, C., et al.: Multimodal foundation models: from specialists to general-purpose assistants. arXiv preprint arXiv:2309.10020, vol. 1, no. 2, p. 2 (2023)","DOI":"10.1561\/9781638283379"},{"key":"13_CR123","doi-asserted-by":"publisher","unstructured":"Liebal, K., Slocombe, K.E., Waller, B.M.: The language void 10 years on: multimodal primate communication research is still uncommon. Ethol. Ecol. Evol. 1\u201314 (2022). https:\/\/doi.org\/10.1080\/03949370.2021.2015453. https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/03949370.2021.2015453","DOI":"10.1080\/03949370.2021.2015453"},{"key":"13_CR124","doi-asserted-by":"publisher","unstructured":"Liesenfeld, A., Lopez, A., Dingemanse, M.: The timing bottleneck: why timing and overlap are mission-critical for conversational user interfaces, speech recognition and dialogue systems. In: Proceedings of the 24th Meeting of the Special Interest Group on Discourse and Dialogue, Prague, Czechia, pp. 482\u2013495. Association for Computational Linguistics (2023). https:\/\/doi.org\/10.18653\/v1\/2023.sigdial-1.45. https:\/\/aclanthology.org\/2023.sigdial-1.45","DOI":"10.18653\/v1\/2023.sigdial-1.45"},{"key":"13_CR125","doi-asserted-by":"publisher","DOI":"10.1111\/cogs.12867","volume":"44","author":"P Litwin","year":"2020","unstructured":"Litwin, P., Mi\u0142kowski, M.: Unification by fiat: arrested development of predictive processing. Cogn. Sci. 44, e12867 (2020). https:\/\/doi.org\/10.1111\/cogs.12867","journal-title":"Cogn. Sci."},{"key":"13_CR126","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1109\/TMM.2020.3046855","volume":"24","author":"C Liu","year":"2020","unstructured":"Liu, C., Mao, Z., Zhang, T., Liu, A.A., Wang, B., Zhang, Y.: Focus your attention: a focal attention for multimodal learning. IEEE Trans. Multimedia 24, 103\u2013115 (2020)","journal-title":"IEEE Trans. Multimedia"},{"key":"13_CR127","unstructured":"Liu, Y., et al.: Jailbreaking chatgpt via prompt engineering: an empirical study. arXiv preprint arXiv:2305.13860 (2023)"},{"key":"13_CR128","doi-asserted-by":"crossref","unstructured":"Liu, Y., et\u00a0al.: Summary of ChatGPT-related research and perspective towards the future of large language models. Meta-Radiol. 100017 (2023)","DOI":"10.1016\/j.metrad.2023.100017"},{"key":"13_CR129","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convnet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"13_CR130","unstructured":"Lord, C., et\u00a0al.: Autism Diagnostic Observation Schedule, 2nd edn (ADOS-2), vol. 284. Western Psychological Corporation, Los Angeles (2012)"},{"key":"13_CR131","doi-asserted-by":"publisher","unstructured":"L\u00fccking, A.: Modeling co-verbal gesture perception in type theory with records. In: Ganzha, M., Maciaszek, L., Paprzycki, M. (eds.) Proceedings of the 2016 Federated Conference on Computer Science and Information Systems. Annals of Computer Science and Information Systems, vol.\u00a08, pp. 383\u2013392. IEEE (2016). https:\/\/doi.org\/10.15439\/2016F83","DOI":"10.15439\/2016F83"},{"key":"13_CR132","doi-asserted-by":"crossref","unstructured":"L\u00fccking, A.: Witness-loaded and witness-free demonstratives. In: Coniglio, M., Murphy, A., Schlachter, E., Veenstra, T. (eds.) Atypical Demonstratives. Syntax, Semantics and Pragmatics, pp. 255\u2013284. No.\u00a0568 in Linguistische Arbeiten, De Gruyter, Berlin and Boston (2018)","DOI":"10.1515\/9783110560299-009"},{"key":"13_CR133","doi-asserted-by":"publisher","unstructured":"L\u00fccking, A., Bergmann, K., Hahn, F., Kopp, S., Rieser, H.: The Bielefeld speech and gesture alignment corpus (SaGA). In: Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality, pp. 92\u201398. LREC 2010, 7th International Conference for Language Resources and Evaluation, Malta (2010). https:\/\/doi.org\/10.13140\/2.1.4216.1922","DOI":"10.13140\/2.1.4216.1922"},{"issue":"1","key":"13_CR134","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1017\/langcog.2022.30","volume":"15","author":"A L\u00fccking","year":"2023","unstructured":"L\u00fccking, A., Ginzburg, J.: Leading voices: dialogue semantics, cognitive science, and the polyphonic structure of multimodal interaction. Lang. Cogn. 15(1), 148\u2013172 (2023). https:\/\/doi.org\/10.1017\/langcog.2022.30","journal-title":"Lang. Cogn."},{"key":"13_CR135","unstructured":"L\u00fccking, A., Mehler, A., Menke, P.: Taking fingerprints of speech-and-gesture ensembles: approaching empirical evidence of intrapersonal alignmnent in multimodal communication. In: Proceedings of the 12th Workshop on the Semantics and Pragmatics of Dialogue, LonDial 2008, pp. 157\u2013164. King\u2019s College London (2008)"},{"key":"13_CR136","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.pragma.2014.12.013","volume":"77","author":"A L\u00fccking","year":"2015","unstructured":"L\u00fccking, A., Pfeiffer, T., Rieser, H.: Pointing and reference reconsidered. J. Pragmat. 77, 56\u201379 (2015). https:\/\/doi.org\/10.1016\/j.pragma.2014.12.013","journal-title":"J. Pragmat."},{"key":"13_CR137","unstructured":"Lugaresi, C., et al.: Mediapipe: a framework for building perception pipelines (2019)"},{"key":"13_CR138","unstructured":"Maerten, A.S., Soydaner, D.: From paintbrush to pixel: a review of deep neural networks in AI-generated art. arXiv arXiv:2302.10913 (2023)"},{"key":"13_CR139","unstructured":"Marcus, G., Southen, R.: Generative AI has a visual plagiarism problem. Experiments with midjourney and dall-e 3 show a copyright minefield. IEEE Spectrum (2024)"},{"key":"13_CR140","doi-asserted-by":"publisher","unstructured":"Marschik, P.B., et al.: Open video data sharing in developmental science and clinical practice. iScience 26(4), 106348 (2023). https:\/\/doi.org\/10.1016\/j.isci.2023.106348","DOI":"10.1016\/j.isci.2023.106348"},{"key":"13_CR141","volume-title":"Hand and Mind - What Gestures Reveal about Thought","author":"D McNeill","year":"1992","unstructured":"McNeill, D.: Hand and Mind - What Gestures Reveal about Thought. Chicago University Press, Chicago (1992)"},{"key":"13_CR142","first-page":"149","volume-title":"Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft","author":"A Mehler","year":"2018","unstructured":"Mehler, A., Hemati, W., Gleim, R., Baumartz, D.: VienNA: Auf dem Weg zu einer Infrastruktur f\u00fcr die verteilte interaktive evolution\u00e4re Verarbeitung nat\u00fcrlicher Sprache. In: Lobin, H., Schneider, R., Witt, A. (eds.) Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft, vol. 6, pp. 149\u2013176. De Gruyter, Berlin (2018)"},{"key":"13_CR143","unstructured":"Mehler, A., L\u00fccking, A.: Pathways of alignment between gesture and speech: assessing information transmission in multimodal ensembles. In: Giorgolo, G., Alahverdzhieva, K. (eds.) Proceedings of the International Workshop on Formal and Computational Approaches to Multimodal Communication under the auspices of ESSLLI 2012, Opole, Poland, 6\u201310 August (2012)"},{"key":"13_CR144","doi-asserted-by":"publisher","DOI":"10.2196\/52865","volume":"25","author":"B Mesk\u00f3","year":"2023","unstructured":"Mesk\u00f3, B.: The impact of multimodal large language models on health care\u2019s future. J. Med. Internet Res. 25, e52865 (2023)","journal-title":"J. Med. Internet Res."},{"key":"13_CR145","unstructured":"Moktefi, A.: Diagrams as scientific instruments. In: Benedek, A., Veszelszki, A. (eds.), Visual, Virtual, Veridical, Series Visual Learning, vol. 7 (2017). https:\/\/www.academia.edu\/33378854\/Diagrams_as_scientific_instruments"},{"key":"13_CR146","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1016\/j.pragma.2014.04.004","volume":"65","author":"L Mondada","year":"2014","unstructured":"Mondada, L.: The local constitution of multimodal resources for social interaction. J. Pragmat. 65, 137\u2013156 (2014). https:\/\/doi.org\/10.1016\/j.pragma.2014.04.004","journal-title":"J. Pragmat."},{"key":"13_CR147","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/s10648-007-9047-2","volume":"19","author":"R Moreno","year":"2007","unstructured":"Moreno, R., Mayer, R.: Interactive multimodal learning environments: special issue on interactive learning environments: contemporary issues and trends. Educ. Psychol. Rev. 19, 309\u2013326 (2007)","journal-title":"Educ. Psychol. Rev."},{"key":"13_CR148","unstructured":"Morris, M.R., et al.: Levels of AGI: operationalizing progress on the path to AGI. arXiv arXiv:2311.02462 (2023)"},{"key":"13_CR149","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1016\/j.inffus.2021.06.007","volume":"76","author":"G Muhammad","year":"2021","unstructured":"Muhammad, G., Alshehri, F., Karray, F., El Saddik, A., Alsulaiman, M., Falk, T.H.: A comprehensive survey on multimodal medical signals fusion for smart healthcare systems. Inf. Fusion 76, 355\u2013375 (2021)","journal-title":"Inf. Fusion"},{"issue":"5","key":"13_CR150","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1111\/j.1467-8721.2007.00518.x","volume":"16","author":"P Mundy","year":"2007","unstructured":"Mundy, P., Newell, L.: Attention, joint attention, and social cognition. Curr. Dir. Psychol. Sci. 16(5), 269\u2013274 (2007). https:\/\/doi.org\/10.1111\/j.1467-8721.2007.00518.x","journal-title":"Curr. Dir. Psychol. Sci."},{"key":"13_CR151","unstructured":"Naert, L., Reverdy, C., Larboulette, C., Gibet, S.: Per channel automatic annotation of sign language motion capture data. In: Proceedings of the LREC2018 8th Workshop on the Representation and Processing of Sign Languages: Involving the Language Community, pp. 139\u2013146. European Language Resources Association (ELRA), Miyazaki, Japan (2018). https:\/\/www.sign-lang.uni-hamburg.de\/lrec\/pub\/18014.pdf"},{"key":"13_CR152","first-page":"14200","volume":"34","author":"A Nagrani","year":"2021","unstructured":"Nagrani, A., Yang, S., Arnab, A., Jansen, A., Schmid, C., Sun, C.: Attention bottlenecks for multimodal fusion. Adv. Neural. Inf. Process. Syst. 34, 14200\u201314213 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR153","doi-asserted-by":"publisher","DOI":"10.1002\/9780470400777","volume-title":"Diagrammatic Reasoning in AI: Decision-Making and Problem-Solving With Diagrams","author":"RT Nakatsu","year":"2009","unstructured":"Nakatsu, R.T.: Diagrammatic Reasoning in AI: Decision-Making and Problem-Solving With Diagrams. Wiley, Hoboken (2009)"},{"key":"13_CR154","doi-asserted-by":"publisher","unstructured":"Nilsson, J.F.: A cube of opposition for predicate logic. Logica Universalis 14(1), 103\u2013114 (2020). https:\/\/doi.org\/10.1007\/s11787-020-00244-3","DOI":"10.1007\/s11787-020-00244-3"},{"key":"13_CR155","doi-asserted-by":"publisher","unstructured":"Nota, N., Trujillo, J.P., Holler, J.: Facial signals and social actions in multimodal face-to-face interaction. Brain Sci. 11(8), 1017 (2021). https:\/\/doi.org\/10.3390\/brainsci11081017. https:\/\/www.mdpi.com\/2076-3425\/11\/8\/1017","DOI":"10.3390\/brainsci11081017"},{"key":"13_CR156","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1016\/j.neuroimage.2016.02.026","volume":"130","author":"E Oberwelland","year":"2016","unstructured":"Oberwelland, E., et al.: Look into my eyes: investigating joint attention using interactive eye-tracking and fMRI in a developmental sample. NeuroImage 130, 248\u2013260 (2016). https:\/\/doi.org\/10.1016\/j.neuroimage.2016.02.026","journal-title":"NeuroImage"},{"key":"13_CR157","unstructured":"OpenAI: ChatGPT (Feb 06 version) [large language model] (2023). https:\/\/chat.openai.com\/chat"},{"key":"13_CR158","unstructured":"OpenAI: GPT-4 technical report. arXiv arXiv:2303.08774 (2023)"},{"key":"13_CR159","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1007\/978-1-4612-2258-3_19","volume-title":"Computing Prosody","author":"M Ostendorf","year":"1997","unstructured":"Ostendorf, M., Ross, K.: A multi-level model for recognition of intonation labels. In: Sagisaka, Y., Campbell, N., Higuchi, N. (eds.) Computing Prosody, pp. 291\u2013308. Springer, New York (1997). https:\/\/doi.org\/10.1007\/978-1-4612-2258-3_19"},{"key":"13_CR160","doi-asserted-by":"publisher","unstructured":"Paggio, P., Jongejan, B., Agirrezabal, M., Navarretta, C.: Detecting head movements in video-recorded dyadic conversations. In: Proceedings of the 20th International Conference on Multimodal Interaction: Adjunct. ICMI 2018. Association for Computing Machinery (2018). https:\/\/doi.org\/10.1145\/3281151.3281152","DOI":"10.1145\/3281151.3281152"},{"key":"13_CR161","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1038\/s41593-021-00997-0","volume":"25","author":"AC Paulk","year":"2022","unstructured":"Paulk, A.C., et al.: Large-scale neural recordings with single neuron resolution using neuropixels probes in human cortex. Nat. Neurosci. 25, 252\u2013263 (2022). https:\/\/doi.org\/10.1038\/s41593-021-00997-0","journal-title":"Nat. Neurosci."},{"issue":"6060","key":"13_CR162","doi-asserted-by":"publisher","first-page":"1226","DOI":"10.1126\/science.1213847","volume":"334","author":"RD Peng","year":"2011","unstructured":"Peng, R.D.: Reproducible research in computational science. Science 334(6060), 1226\u20131227 (2011)","journal-title":"Science"},{"issue":"3","key":"13_CR163","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1075\/is.18.3.05per","volume":"18","author":"M Perlman","year":"2017","unstructured":"Perlman, M.: Debunking two myths against vocal origins of language. Interact. Stud. 18(3), 376\u2013401 (2017). https:\/\/doi.org\/10.1075\/is.18.3.05per","journal-title":"Interact. Stud."},{"key":"13_CR164","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.3389\/fpsyg.2018.01109","volume":"9","author":"P Perniss","year":"2018","unstructured":"Perniss, P.: Why we should study multimodal language. Front. Psychol. 9, 1109 (2018). https:\/\/doi.org\/10.3389\/fpsyg.2018.01109","journal-title":"Front. Psychol."},{"key":"13_CR165","doi-asserted-by":"publisher","unstructured":"Poustka, L., Schulte-R\u00fcther, M.: Autismus-Spektrum-St\u00f6rungen bei Kindern und Jugendlichen. In: Fegert, J., et al. (eds.) Psychiatrie und Psychotherapie des Kindes- und Jugendalters, pp. 1\u201323. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-662-49289-5_123-1","DOI":"10.1007\/978-3-662-49289-5_123-1"},{"key":"13_CR166","doi-asserted-by":"publisher","unstructured":"Pouw, W., Dixon, J.A.: Entrainment and modulation of gesture-speech synchrony under delayed auditory feedback. Cogn. Sci. 43(3), e12721 (2019). https:\/\/doi.org\/10.1111\/cogs.12721. https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1111\/cogs.12721","DOI":"10.1111\/cogs.12721"},{"key":"13_CR167","doi-asserted-by":"publisher","unstructured":"Pouw, W., Fuchs, S.: Origins of vocal-entangled gesture. Neurosci. Biobehav. Rev. 141, 104836 (2022). https:\/\/doi.org\/10.1016\/j.neubiorev.2022.104836. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0149763422003256","DOI":"10.1016\/j.neubiorev.2022.104836"},{"key":"13_CR168","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"13_CR169","doi-asserted-by":"crossref","unstructured":"Ray, P.P.: Chatgpt: a comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet Things Cyber-Phys. Syst. (2023)","DOI":"10.1016\/j.iotcps.2023.04.003"},{"key":"13_CR170","unstructured":"Reichenberger, A., Lemanski, J., Bhattacharjee, R.: The role of gestures in logic. Vis. Commun. (upcoming)"},{"key":"13_CR171","doi-asserted-by":"publisher","unstructured":"Ripperda, J., Drijvers, L., Holler, J.: Speeding up the detection of non-iconic and iconic gestures (spudnig): a toolkit for the automatic detection of hand movements and gestures in video data. Behav. Res. Methods 52(4), 1783\u20131794 (2020). https:\/\/doi.org\/10.3758\/s13428-020-01350-2","DOI":"10.3758\/s13428-020-01350-2"},{"key":"13_CR172","unstructured":"Rohrer, P.L.: A temporal and pragmatic analysis of gesture-speech association. A corpus-based approach using the novel MultiModal MultiDimensional (M3D) labeling system. Ph.D. thesis, Nantes Universit\u00e9 (2022)"},{"key":"13_CR173","unstructured":"Rohrer, P.L., et al.: The MultiModal MultiDimensional (M3D) labeling system (2023). https:\/\/doi.org\/10.17605\/osf.io\/ankdx"},{"key":"13_CR174","unstructured":"Rosenberg, A.: Classification of prosodic events using quantized contour modeling. In: Proceedings of HLT-NAACL, pp. 721\u2013724 (2010)"},{"key":"13_CR175","doi-asserted-by":"publisher","unstructured":"Rosenberg, A., Hasegawa-Johnson, M.: Automatic prosody labelling and assessment. In: Gussenhoven, C., Chen, A. (eds.) The Oxford Handbook of Language Prosody, pp. 645\u2013656. Oxford University Press, Oxford (2020). https:\/\/doi.org\/10.1093\/oxfordhb\/9780198832232.013.43","DOI":"10.1093\/oxfordhb\/9780198832232.013.43"},{"issue":"5","key":"13_CR176","doi-asserted-by":"publisher","first-page":"1395","DOI":"10.1016\/j.ridd.2012.03.007","volume":"33","author":"AL Rumpf","year":"2012","unstructured":"Rumpf, A.L., Kamp-Becker, I., Becker, K., Kauschke, C.: Narrative competence and internal state language of children with asperger syndrome and ADHD. Res. Dev. Disabil. 33(5), 1395\u20131407 (2012). https:\/\/doi.org\/10.1016\/j.ridd.2012.03.007","journal-title":"Res. Dev. Disabil."},{"issue":"4","key":"13_CR177","doi-asserted-by":"publisher","first-page":"696","DOI":"10.2307\/412243","volume":"50","author":"H Sacks","year":"1974","unstructured":"Sacks, H., Schegloff, E.A., Jefferson, G.: A simplest systematics for the organization of turn-taking for conversation. Language 50(4), 696 (1974). https:\/\/doi.org\/10.2307\/412243","journal-title":"Language"},{"key":"13_CR178","unstructured":"Sadasivan, V.S., Kumar, A., Balasubramanian, S., Wang, W., Feizi, S.: Can AI-generated text be reliably detected? (2023)"},{"key":"13_CR179","unstructured":"Sadler, M., Regan, N.: Game Changer. New in Chess (2019)"},{"key":"13_CR180","doi-asserted-by":"crossref","unstructured":"Sallam, M.: Chatgpt utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns. In: Healthcare, vol.\u00a011, p.\u00a0887. MDPI (2023)","DOI":"10.3390\/healthcare11060887"},{"issue":"1","key":"13_CR181","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s13054-022-04291-8","volume":"27","author":"M Salvagno","year":"2023","unstructured":"Salvagno, M., Taccone, F.S., Gerli, A.G., et al.: Can artificial intelligence help for scientific writing? Crit. Care 27(1), 1\u20135 (2023)","journal-title":"Crit. Care"},{"issue":"3","key":"13_CR182","first-page":"18","volume":"7","author":"M Sankey","year":"2011","unstructured":"Sankey, M., Birch, D., Gardiner, M.: The impact of multiple representations of content using multimedia on learning outcomes across learning styles and modal preferences. Int. J. Educ. Dev. ICT 7(3), 18\u201335 (2011)","journal-title":"Int. J. Educ. Dev. ICT"},{"key":"13_CR183","unstructured":"Saravia, E.: Prompt Engineering Guide (2022). https:\/\/github.com\/dair-ai\/Prompt-Engineering-Guide"},{"key":"13_CR184","doi-asserted-by":"crossref","unstructured":"Schepens, J., Marx, N., Gagl, B.: Can we utilize large language models (LLMS) to generate useful linguistic corpora? A case study of the word frequency effect in young German readers (2023)","DOI":"10.31234\/osf.io\/gm9b6"},{"issue":"1","key":"13_CR185","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1111\/jcpp.13650","volume":"64","author":"M Schulte-R\u00fcther","year":"2023","unstructured":"Schulte-R\u00fcther, M., et al.: Using machine learning to improve diagnostic assessment of ASD in the light of specific differential and co-occurring diagnoses. J. Child Psychol. Psychiatry 64(1), 16\u201326 (2023). https:\/\/doi.org\/10.1111\/jcpp.13650","journal-title":"J. Child Psychol. Psychiatry"},{"issue":"2","key":"13_CR186","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1002\/aur.1654","volume":"10","author":"M Schulte-R\u00fcther","year":"2017","unstructured":"Schulte-R\u00fcther, M., et al.: Intact mirror mechanisms for automatic facial emotions in children and adolescents with autism spectrum disorder. Autism Res. 10(2), 298\u2013310 (2017). https:\/\/doi.org\/10.1002\/aur.1654","journal-title":"Autism Res."},{"key":"13_CR187","unstructured":"Schweitzer, A.: Production and perception of prosodic events-evidence from corpus-based experiments. Ph.D. thesis, Universit\u00e4t Stuttgart, Stuttgart (2010). http:\/\/elib.uni-stuttgart.de\/opus\/volltexte\/2011\/6031\/pdf\/Dissertation_Schweitzer.pdf"},{"key":"13_CR188","doi-asserted-by":"publisher","unstructured":"Schweitzer, A., M\u00f6bius, B.: Experiments on automatic prosodic labeling. In: Proceedings of the 10th International Conference on Speech Communication and Technology, pp. 2515\u20132518. Brighton (2009). https:\/\/doi.org\/10.21437\/Interspeech.2009-663","DOI":"10.21437\/Interspeech.2009-663"},{"key":"13_CR189","doi-asserted-by":"crossref","unstructured":"Shimojima, A.: Operational constraints in diagrammatic reasoning. In: Allwein, G., Barwise, J. (eds.) Logical Reasoning with Diagrams. Oxford University Press, Oxford (1996)","DOI":"10.1093\/oso\/9780195104271.003.0006"},{"key":"13_CR190","volume-title":"Semantic Properties of Diagrams and their Cognitive Potentials","author":"A Shimojima","year":"2015","unstructured":"Shimojima, A.: Semantic Properties of Diagrams and their Cognitive Potentials. CSLI Publications, Stanford (2015)"},{"key":"13_CR191","doi-asserted-by":"publisher","unstructured":"Shin, S.J.: The logical status of diagrams. Cambridge University Press, Cambridge (1995). https:\/\/doi.org\/10.1017\/CBO9780511574696. https:\/\/www.cambridge.org\/core\/books\/logical-status-of-diagrams\/27130C396E0899C90BC632B4C7617E2B","DOI":"10.1017\/CBO9780511574696"},{"key":"13_CR192","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550, 354\u2013359 (2017). https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"key":"13_CR193","unstructured":"van\u00a0der Sluis, I.: Multimodal Reference. Studies in Automatic Generation of Multimodal Referring Expressions. Uitgevershuis BuG, Groningen, NL. Ph.D thesis, Univ. van Tilburg (2005)"},{"issue":"2","key":"13_CR194","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/s10849-017-9250-6","volume":"26","author":"G Stapleton","year":"2017","unstructured":"Stapleton, G., Jamnik, M., Shimojima, A.: What makes an effective representation of information: a formal account of observational advantages. J. Logic Lang. Inform. 26(2), 143\u2013177 (2017). https:\/\/doi.org\/10.1007\/s10849-017-9250-6","journal-title":"J. Logic Lang. Inform."},{"issue":"Suppl 1","key":"13_CR195","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1007\/s00106-023-01310-0","volume":"71","author":"T St\u00f6ver","year":"2023","unstructured":"St\u00f6ver, T., et al.: Structure and establishment of the German Cochlear Implant Registry (DCIR). HNO 71(Suppl 1), 82\u201392 (2023)","journal-title":"HNO"},{"key":"13_CR196","doi-asserted-by":"crossref","unstructured":"Sun, C., Shrivastava, A., Singh, S., Gupta, A.: Revisiting unreasonable effectiveness of data in deep learning era. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 843\u2013852 (2017)","DOI":"10.1109\/ICCV.2017.97"},{"key":"13_CR197","doi-asserted-by":"crossref","unstructured":"Syrdal, A.K., McGory, J.: Inter-transcriber reliability of ToBI prosodic labeling. In: 6th International Conference on Spoken Language Processing (ICSLP 2000), vol. 3, pp. 235\u2013238 (2000). https:\/\/www.isca-speech.org\/archive\/icslp_2000\/i00_3235.html","DOI":"10.21437\/ICSLP.2000-521"},{"key":"13_CR198","unstructured":"Taori, R., et al.: Alpaca: a strong, replicable instruction-following model. Stanford Center for Research on Foundation Models, vol. 3, no. 6, p. 7 (2023). https:\/\/crfmstanford.edu\/2023\/03\/13\/alpaca.html"},{"key":"13_CR199","unstructured":"Team, G., et\u00a0al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"13_CR200","unstructured":"T\u00f6rnberg, P.: ChatGPT-4 outperforms experts and crowd workers in annotating political twitter messages with zero-shot learning. arXiv preprint arXiv:2304.06588 (2023)"},{"key":"13_CR201","doi-asserted-by":"publisher","unstructured":"Torres, M.J.R., Barwaldt, R.: Approaches for diagrams accessibility for blind people: a systematic review. In: 2019 IEEE Frontiers in Education Conference (FIE), pp.\u00a01\u20137 (2019). https:\/\/doi.org\/10.1109\/FIE43999.2019.9028522. https:\/\/ieeexplore.ieee.org\/document\/9028522. ISSN 2377-634X","DOI":"10.1109\/FIE43999.2019.9028522"},{"key":"13_CR202","unstructured":"Touvron, H., et al.: Llama 2: open foundation and fine-tuned chat models. arXiv arXiv:2307.09288 (2023)"},{"issue":"5","key":"13_CR203","doi-asserted-by":"publisher","first-page":"1136","DOI":"10.1177\/17456916221141422","volume":"18","author":"JP Trujillo","year":"2023","unstructured":"Trujillo, J.P., Holler, J.: Interactionally embedded gestalt principles of multimodal human communication. Perspect. Psychol. Sci. 18(5), 1136\u20131159 (2023). https:\/\/doi.org\/10.1177\/17456916221141422","journal-title":"Perspect. Psychol. Sci."},{"issue":"3","key":"13_CR204","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1075\/gest.12.3.04tut","volume":"12","author":"M Tutton","year":"2012","unstructured":"Tutton, M.: When and why the lexical ground is a gestural figure. Gesture 12(3), 361\u2013386 (2012). https:\/\/doi.org\/10.1075\/gest.12.3.04tut","journal-title":"Gesture"},{"issue":"3","key":"13_CR205","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1016\/j.learninstruc.2007.02.006","volume":"17","author":"Y Uesaka","year":"2007","unstructured":"Uesaka, Y., Manalo, E., Ichikawa, S.: What kinds of perceptions and daily learning behaviors promote students\u2019 use of diagrams in mathematics problem solving? Learn. Instr. 17(3), 322\u2013335 (2007)","journal-title":"Learn. Instr."},{"key":"13_CR206","volume-title":"An Introduction to Cognitive Linguistics","author":"F Ungerer","year":"2006","unstructured":"Ungerer, F., Schmid, H.J.: An Introduction to Cognitive Linguistics, 2nd edn. Pearson, Harlow (2006)","edition":"2"},{"key":"13_CR207","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"13_CR208","doi-asserted-by":"publisher","unstructured":"Wagner, P., Malisz, Z., Kopp, S.: Gesture and speech in interaction: an overview. Speech Commun. 57, 209\u2013232 (2014). https:\/\/doi.org\/10.1016\/j.specom.2013.09.008. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167639313001295","DOI":"10.1016\/j.specom.2013.09.008"},{"key":"13_CR209","doi-asserted-by":"publisher","unstructured":"Wagner, P., \u0106wiek, A., Samlowski, B.: Exploiting the speech-gesture link to capture fine-grained prosodic prominence impressions and listening strategies. J. Phonetics 76, 100911 (2019). https:\/\/doi.org\/10.1016\/j.wocn.2019.07.001. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S009544701830038X","DOI":"10.1016\/j.wocn.2019.07.001"},{"issue":"2","key":"13_CR210","doi-asserted-by":"publisher","DOI":"10.1002\/mef2.43","volume":"2","author":"DQ Wang","year":"2023","unstructured":"Wang, D.Q., Feng, L.Y., Ye, J.G., Zou, J.G., Zheng, Y.F.: Accelerating the integration of chatgpt and other large-scale AI models into biomedical research and healthcare. MedComm-Future Med. 2(2), e43 (2023)","journal-title":"MedComm-Future Med."},{"key":"13_CR211","doi-asserted-by":"crossref","unstructured":"Watkins, R.: Guidance for researchers and peer-reviewers on the ethical use of large language models (LLMS) in scientific research workflows. AI Ethics 1\u20136 (2023)","DOI":"10.1007\/s43681-023-00294-5"},{"key":"13_CR212","unstructured":"Wei, X., et\u00a0al.: Zero-shot information extraction via chatting with chatgpt. arXiv preprint arXiv:2302.10205 (2023)"},{"key":"13_CR213","unstructured":"Wicke, P.: Probing language models\u2019 gesture understanding for enhanced human-AI interaction. arXiv arXiv:2401.17858 (2024)"},{"issue":"4","key":"13_CR214","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1109\/89.326607","volume":"2","author":"CW Wightman","year":"1994","unstructured":"Wightman, C.W., Ostendorf, M.: Automatic labeling of prosodic patterns. IEEE Trans. Speech Audio Process. 2(4), 469\u2013481 (1994). https:\/\/doi.org\/10.1109\/89.326607","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"13_CR215","doi-asserted-by":"publisher","unstructured":"Winding, M., et al.: The connectome of an insect brain. Science 379(6636), eadd9330 (2023). https:\/\/doi.org\/10.1126\/science.add9330","DOI":"10.1126\/science.add9330"},{"key":"13_CR216","unstructured":"Wittenburg, P., Brugman, H., Russel, A., Klassmann, A., Sloetjes, H.: ELAN: a professional framework for multimodality research. In: Proceedings of the 5th International Conference on Language Resources and Evaluation, LREC 2006, pp. 1556\u20131559 (2006)"},{"key":"13_CR217","unstructured":"World Health Organization (WHO): International classification of diseases, eleventh revision (ICD-11) (2019\/2021). https:\/\/icd.who.int\/browse11"},{"key":"13_CR218","unstructured":"Wu, S., Fei, H., Qu, L., Ji, W., Chua, T.S.: Next-GPT: any-to-any multimodal LLM. CoRR abs\/2309.05519 (2023)"},{"key":"13_CR219","doi-asserted-by":"publisher","unstructured":"Xu, K., Zhong, G., Deng, Z., Zhang, K., Huang, K.: Self-supervised generative learning for sequential data prediction. Appl. Intell. 53, 20675\u201320689 (2023). https:\/\/doi.org\/10.1007\/s10489-023-04578-5","DOI":"10.1007\/s10489-023-04578-5"},{"key":"13_CR220","unstructured":"Yadlowsky, S., Doshi, L., Tripuraneni, N.: Pretraining data mixtures enable narrow model selection capabilities in transformer models. arXiv preprint arXiv:2311.00871 (2023)"},{"issue":"3","key":"13_CR221","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/3446776","volume":"64","author":"C Zhang","year":"2021","unstructured":"Zhang, C., Bengio, S., Hardt, M., Recht, B., Vinyals, O.: Understanding deep learning (still) requires rethinking generalization. Commun. ACM 64(3), 107\u2013115 (2021)","journal-title":"Commun. ACM"},{"key":"13_CR222","unstructured":"Zhang, Y., et al.: Meta-transformer: a unified framework for multimodal learning. arXiv preprint arXiv:2307.10802 (2023)"},{"key":"13_CR223","unstructured":"Zhou, K., et al.: Don\u2019t make your LLM an evaluation benchmark cheater. arXiv preprint arXiv:2311.01964 (2023)"}],"container-title":["Lecture Notes in Computer Science","Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-61066-0_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T04:16:27Z","timestamp":1717215387000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-61066-0_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031610653","9783031610660"],"references-count":223,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-61066-0_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"1 June 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HCII","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human-Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Washington DC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hcii2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.hci.international\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}