{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T23:28:33Z","timestamp":1743118113559,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030215644"},{"type":"electronic","value":"9783030215651"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-21565-1_8","type":"book-chapter","created":{"date-parts":[[2019,7,9]],"date-time":"2019-07-09T23:04:01Z","timestamp":1562713441000},"page":"108-123","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Exploring Temporal Dependencies in Multimodal Referring Expressions with Mixed Reality"],"prefix":"10.1007","author":[{"given":"Elena","family":"Sibirtseva","sequence":"first","affiliation":[]},{"given":"Ali","family":"Ghadirzadeh","sequence":"additional","affiliation":[]},{"given":"Iolanda","family":"Leite","sequence":"additional","affiliation":[]},{"given":"M\u00e5rten","family":"Bj\u00f6rkman","sequence":"additional","affiliation":[]},{"given":"Danica","family":"Kragic","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,6,8]]},"reference":[{"key":"8_CR1","unstructured":"Admoni, H., Srinivasa, S.: Predicting user intent through eye gaze for shared autonomy. In: Proceedings of the AAAI Fall Symposium Series: Shared Autonomy in Research and Practice (AAAI Fall Symposium), pp. 298\u2013303 (2016)"},{"key":"8_CR2","first-page":"111","volume-title":"The Oxford Handbook of Language and Social Psychology","author":"J Bavelas","year":"2014","unstructured":"Bavelas, J., Gerwing, J., Healing, S.: Hand and facial gestures in conversational interaction. In: Holtgraves, T.M. (ed.) The Oxford Handbook of Language and Social Psychology, pp. 111\u2013130. Oxford University Press, Oxford (2014)"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Bolt, R.A.: \u201cPut-that-there\u201d: voice and gesture at the graphics interface, vol. 14. ACM (1980)","DOI":"10.1145\/965105.807503"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Chai, J.Y., et al.: Collaborative effort towards common ground in situated human-robot dialogue. In: Proceedings of the 2014 ACM\/IEEE International Conference on Human-Robot Interaction, pp. 33\u201340. ACM (2014)","DOI":"10.1145\/2559636.2559677"},{"key":"8_CR5","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Duarte, N., Tasevski, J., Coco, M., Rakovi\u0107, M., Santos-Victor, J.: Action anticipation: reading the intentions of humans and robots. arXiv preprint arXiv:1802.02788 (2018)","DOI":"10.1109\/LRA.2018.2861569"},{"key":"8_CR7","unstructured":"Funakoshi, K., Nakano, M., Tokunaga, T., Iida, R.: A unified probabilistic approach to referring expressions. In: Proceedings of the 13th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 237\u2013246. Association for Computational Linguistics (2012)"},{"key":"8_CR8","doi-asserted-by":"publisher","first-page":"1073","DOI":"10.1016\/j.procs.2014.08.194","volume":"35","author":"K Harezlak","year":"2014","unstructured":"Harezlak, K., Kasprowski, P., Stasch, M.: Towards accurate eye tracker calibration-methods and procedures. Proc. Comput. Sci. 35, 1073\u20131081 (2014)","journal-title":"Proc. Comput. Sci."},{"key":"8_CR9","first-page":"1049","volume":"6","author":"CM Huang","year":"2015","unstructured":"Huang, C.M., Andrist, S., Saupp\u00e9, A., Mutlu, B.: Using gaze patterns to predict task intent in collaboration. Front. Psychol. 6, 1049 (2015)","journal-title":"Front. Psychol."},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Kontogiorgos, D., Sibirtseva, E., Pereira, A., Skantze, G., Gustafson, J.: Multimodal reference resolution in collaborative assembly tasks. In: Proceedings of the 4th International Workshop on Multimodal Analyses Enabling Artificial Agents in Human-Machine Interaction, pp. 38\u201342. ACM (2018)","DOI":"10.1145\/3279972.3279976"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Lalanne, D., Nigay, L., Robinson, P., Vanderdonckt, J., Ladry, J.F., et al.: Fusion engines for multimodal input: a survey. In: Proceedings of the 2009 international conference on Multimodal interfaces, pp. 153\u2013160. ACM (2009)","DOI":"10.1145\/1647314.1647343"},{"key":"8_CR12","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/j.patrec.2018.05.023","volume":"118","author":"S Lathuili\u00e8re","year":"2018","unstructured":"Lathuili\u00e8re, S., Mass\u00e9, B., Mesejo, P., Horaud, R.: Neural network based reinforcement learning for audio-visual gaze control in human-robot interaction. Pattern Recogn. Lett. 118, 61\u201371 (2018)","journal-title":"Pattern Recogn. Lett."},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"Mehlmann, G., H\u00e4ring, M., Janowski, K., Baur, T., Gebhard, P., Andr\u00e9, E.: Exploring a model of gaze for grounding in multimodal HRI. In: Proceedings of the 16th International Conference on Multimodal Interaction, pp. 247\u2013254. ACM (2014)","DOI":"10.1145\/2663204.2663275"},{"issue":"7540","key":"8_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Minh","year":"2015","unstructured":"Minh, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Qureshi, A.H., Nakamura, Y., Yoshikawa, Y., Ishiguro, H.: Robot gains social intelligence through multimodal deep reinforcement learning. In: 2016 IEEE-RAS 16th International Conference on Humanoid Robots (Humanoids), pp. 745\u2013751. IEEE (2016)","DOI":"10.1109\/HUMANOIDS.2016.7803357"},{"issue":"1\u20133","key":"8_CR16","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds, D.A., Quatieri, T.F., Dunn, R.B.: Speaker verification using adapted gaussian mixture models. Digit. Sig. Process. 10(1\u20133), 19\u201341 (2000)","journal-title":"Digit. Sig. Process."},{"issue":"9","key":"8_CR17","doi-asserted-by":"publisher","first-page":"1927","DOI":"10.1109\/TCYB.2014.2362101","volume":"45","author":"A Savran","year":"2015","unstructured":"Savran, A., Cao, H., Nenkova, A., Verma, R.: Temporal bayesian fusion for affect sensing: combining video, audio, and lexical modalities. IEEE Trans. Cybern. 45(9), 1927\u20131941 (2015)","journal-title":"IEEE Trans. Cybern."},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Sibirtseva, E., et al.: A comparison of visualisation methods for disambiguating verbal requests in human-robot interaction. arXiv preprint arXiv:1801.08760 (2018)","DOI":"10.1109\/ROMAN.2018.8525554"},{"issue":"3","key":"8_CR19","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1023\/B:STCO.0000035301.49549.88","volume":"14","author":"AJ Smola","year":"2004","unstructured":"Smola, A.J., Sch\u00f6lkopf, B.: A tutorial on support vector regression. Stat. Comput. 14(3), 199\u2013222 (2004)","journal-title":"Stat. Comput."},{"key":"8_CR20","unstructured":"Srivastava, N., Salakhutdinov, R.: Learning representations for multimodal data with deep belief nets. In: International Conference on Machine Learning Workshop, vol. 79 (2012)"},{"issue":"2\u20133","key":"8_CR21","first-page":"105","volume":"4","author":"A Thomaz","year":"2016","unstructured":"Thomaz, A., Hoffman, G., Cakmak, M., et al.: Computational human-robot interaction. Found. Trends\u00ae Robot. 4(2\u20133), 105\u2013223 (2016)","journal-title":"Found. Trends\u00ae Robot."},{"key":"8_CR22","volume-title":"Probabilistic Robotics","author":"S Thrun","year":"2005","unstructured":"Thrun, S., Burgard, W., Fox, D.: Probabilistic Robotics. MIT Press, Cambridge (2005)"},{"key":"8_CR23","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1016\/j.patrec.2013.07.003","volume":"36","author":"M Turk","year":"2014","unstructured":"Turk, M.: Multimodal interaction: a review. Pattern Recogn. Lett. 36, 189\u2013195 (2014)","journal-title":"Pattern Recogn. Lett."},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Xu, H., Donahue, J., Rohrbach, M., Mooney, R., Saenko, K.: Translating videos to natural language using deep recurrent neural networks. arXiv preprint arXiv:1412.4729 (2014)","DOI":"10.3115\/v1\/N15-1173"},{"key":"8_CR25","doi-asserted-by":"publisher","first-page":"53","DOI":"10.3389\/frobt.2017.00053","volume":"4","author":"A Veronese","year":"2017","unstructured":"Veronese, A., Racca, M., Pieters, R.S., Kyrki, V.: Probabilistic mapping of human visual attention from head pose estimation. Front. Robot. AI 4, 53 (2017)","journal-title":"Front. Robot. AI"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Whitney, D., Eldon, M., Oberlin, J., Tellex, S.: Interpreting multimodal referring expressions in real time. In: 2016 IEEE International Conference on Robotics and Automation (ICRA), pp. 3331\u20133338. IEEE (2016)","DOI":"10.1109\/ICRA.2016.7487507"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Yao, L., et al.: Describing videos by exploiting temporal structure. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"}],"container-title":["Lecture Notes in Computer Science","Virtual, Augmented and Mixed Reality. Applications and Case Studies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-21565-1_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:06:00Z","timestamp":1689725160000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-21565-1_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030215644","9783030215651"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-21565-1_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"8 June 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HCII","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human-Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Orlando, FL","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 July 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hcii2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2019.hci.international\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}