{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:17:06Z","timestamp":1757625426926,"version":"3.44.0"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032025470"},{"type":"electronic","value":"9783032025487"}],"license":[{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-02548-7_10","type":"book-chapter","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T05:39:55Z","timestamp":1755754795000},"page":"109-120","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond Static Emotions: Leveraging Multitask Learning to\u00a0Model Dynamics of\u00a0Dimensional Affect in\u00a0Speech"],"prefix":"10.1007","author":[{"given":"Yuxuan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Hippolyte","family":"Fournier","sequence":"additional","affiliation":[]},{"given":"Ruslan","family":"Kalitvianski","sequence":"additional","affiliation":[]},{"given":"Marco","family":"Dinarelli","sequence":"additional","affiliation":[]},{"given":"Fabien","family":"Ringeval","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"issue":"6","key":"10_CR1","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1109\/MSP.2021.3106890","volume":"38","author":"S Alisamir","year":"2021","unstructured":"Alisamir, S., Ringeval, F.: On the evolution of speech representations for affective computing: a brief history and critical overview. IEEE Signal Process. Mag. 38(6), 12\u201321 (2021)","journal-title":"IEEE Signal Process. Mag."},{"key":"10_CR2","unstructured":"Alisamir, S., Ringeval, F., Portet, F.: Dynamic time-alignment of dimensional annotations of emotion using recurrent neural networks. arXiv e-prints arXiv:2209.10223 (2022)"},{"key":"10_CR3","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: Wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Advances in Neural Information Processing Systems, vol.\u00a033, pp. 12449\u201312460 (2020)"},{"issue":"2","key":"10_CR4","doi-asserted-by":"publisher","first-page":"432","DOI":"10.5465\/amj.2010.0894","volume":"56","author":"R Bledow","year":"2013","unstructured":"Bledow, R., Rosing, K., Frese, M.: A dynamic perspective on affect and creativity. Acad. Manag. J. 56(2), 432\u2013450 (2013)","journal-title":"Acad. Manag. J."},{"key":"10_CR5","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. In: NIPS Workshop on Deep Learning (2014)"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised Cross-Lingual Representation Learning for Speech Recognition. In: Proceedings INTERSPEECH, pp. 2426\u20132430. ISCA, Brno (2021)","DOI":"10.21437\/Interspeech.2021-329"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Dang, T., Dimitriadis, A., Wu, J., Sethu, V., Ambikairajah, E.: Constrained dynamical neural ode for time series modelling: a case study on continuous emotion prediction. In: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135 (2023)","DOI":"10.1109\/ICASSP49357.2023.10095778"},{"key":"10_CR8","unstructured":"Duquenne, P.A., Schwenk, H., Sagot, B.: Sonar: sentence-level multimodal and language-agnostic representations. arXiv e-prints, pp. arXiv\u20132308 (2023)"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Dutta, S., Ganapathy, S.: Multimodal transformer with learnable frontend and self attention for emotion recognition. In: ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6917\u20136921. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9747723"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Evain, S., et al.: LeBenchmark: a reproducible framework for assessing self-supervised representation learning from speech. In: Proceedings INTERSPEECH, pp. 1439\u20131443. ISCA, Brno (2021)","DOI":"10.21437\/Interspeech.2021-556"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Ghosal, D., Majumder, N., Poria, S., Chhaya, N., Gelbukh, A.: DialogueGCN: a graph convolutional neural network for emotion recognition in conversation. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 154\u2013164. ACL, Hong Kong (2019)","DOI":"10.18653\/v1\/D19-1015"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Graves, A.: Long short-term memory. In: Supervised Sequence Labelling with Recurrent Neural Networks, pp. 37\u201345 (2012)","DOI":"10.1007\/978-3-642-24797-2_4"},{"key":"10_CR13","doi-asserted-by":"crossref","unstructured":"Hsu, J.H., Wu, C.H., Wei, Y.H.: Speech emotion recognition using decomposed speech via multi-task learning. In: Proceedings INTERSPEECH, pp. 4553\u20134557. ISCA, Dublin (2023)","DOI":"10.21437\/Interspeech.2023-396"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Jhin, S.Y., Kim, S., Park, N.: Addressing prediction delays in time series forecasting: a continuous GRU approach with derivative regularization. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 1234\u20131245. ACM, New York (2024)","DOI":"10.1145\/3637528.3671969"},{"issue":"6","key":"10_CR15","doi-asserted-by":"publisher","first-page":"e1003094","DOI":"10.1371\/journal.pcbi.1003094","volume":"9","author":"M Joffily","year":"2013","unstructured":"Joffily, M., Coricelli, G.: Emotional valence and the free-energy principle. PLoS Comput. Biol. 9(6), e1003094 (2013)","journal-title":"PLoS Comput. Biol."},{"issue":"4","key":"10_CR16","doi-asserted-by":"publisher","first-page":"1069","DOI":"10.1109\/TAFFC.2019.2917047","volume":"12","author":"S Khorram","year":"2021","unstructured":"Khorram, S., McInnis, M.G., Provost, E.M.: Jointly aligning and predicting continuous emotion annotations. IEEE Trans. Affect. Comput. 12(4), 1069\u20131083 (2021)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"10_CR17","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2017)"},{"issue":"3","key":"10_CR18","doi-asserted-by":"publisher","first-page":"1022","DOI":"10.1109\/TPAMI.2019.2944808","volume":"43","author":"J Kossaifi","year":"2021","unstructured":"Kossaifi, J., et al.: SEWA DB: a rich database for audio-visual emotion and sentiment research in the wild. IEEE Trans. Pattern Anal. Mach. Intell. 43(3), 1022\u20131040 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"4","key":"10_CR19","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1177\/1754073915590947","volume":"7","author":"P Kuppens","year":"2015","unstructured":"Kuppens, P.: It\u2019s about time: a special section on affect dynamics. Emot. Rev. 7(4), 297\u2013300 (2015)","journal-title":"Emot. Rev."},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Lim, W., Jang, D., Lee, T.: Speech emotion recognition using convolutional and recurrent neural networks. In: 2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA), pp.\u00a01\u20134. IEEE (2016)","DOI":"10.1109\/APSIPA.2016.7820699"},{"issue":"1","key":"10_CR21","doi-asserted-by":"publisher","first-page":"255","DOI":"10.2307\/2532051","volume":"45","author":"LI Lin","year":"1989","unstructured":"Lin, L.I.: A concordance correlation coefficient to evaluate reproducibility. Biometrics 45(1), 255\u2013268 (1989)","journal-title":"Biometrics"},{"issue":"6","key":"10_CR22","doi-asserted-by":"publisher","first-page":"3159","DOI":"10.1109\/TCSS.2022.3219825","volume":"10","author":"C Lu","year":"2022","unstructured":"Lu, C., et al.: Speech emotion recognition via an attentive time-frequency neural network. IEEE Trans. Comput. Soc. Syst. 10(6), 3159\u20133168 (2022)","journal-title":"IEEE Trans. Comput. Soc. Syst."},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Mirsamadi, S., Barsoum, E., Zhang, C.: Automatic speech emotion recognition using recurrent neural networks with local attention. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2227\u20132231. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Parcollet, T., et al.: LeBenchmark 2.0: a standardized, replicable and enhanced framework for self-supervised representations of French speech. Comput. Speech Lang. 86, 101622 (2024)","DOI":"10.1016\/j.csl.2024.101622"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Parthasarathy, S., Busso, C.: Jointly predicting arousal, valence and dominance with multi-task learning. In: Proceedings INTERSPEECH, pp. 1103\u20131107. ISCA, Stokholm (2017)","DOI":"10.21437\/Interspeech.2017-1494"},{"issue":"3","key":"10_CR26","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1109\/TBIOM.2022.3233083","volume":"5","author":"RG Praveen","year":"2023","unstructured":"Praveen, R.G., Cardinal, P., Granger, E.: Audio\u2013visual fusion for emotion recognition in the valence\u2013arousal space using joint cross-attention. IEEE Trans. Biomet. Behav. Identity Sci. 5(3), 360\u2013373 (2023)","journal-title":"IEEE Trans. Biomet. Behav. Identity Sci."},{"key":"10_CR27","doi-asserted-by":"crossref","unstructured":"Ringeval, F., et al.: AVEC 2019 Workshop and Challenge: State-of-Mind, Detecting Depression with AI, and Cross-Cultural Affect Recognition (2019)","DOI":"10.1145\/3347320.3357688"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Ringeval, F., Sonderegger, A., Sauer, J., Lalanne, D.: Introducing the RECOLA multimodal corpus of remote collaborative and affective interactions. In: 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), pp.\u00a01\u20138 (April 2013)","DOI":"10.1109\/FG.2013.6553805"},{"key":"10_CR29","first-page":"5","volume-title":"The Cambridge Handbook of Human Affective Neuroscience","author":"D Sander","year":"2013","unstructured":"Sander, D.: Models of emotion. In: Armony, J.L., Vuilleumier, P. (eds.) The Cambridge Handbook of Human Affective Neuroscience, pp. 5\u201356. Cambridge University Press, Cambridge (2013)"},{"issue":"1","key":"10_CR30","doi-asserted-by":"publisher","first-page":"719","DOI":"10.1146\/annurev-psych-122216-011854","volume":"70","author":"KR Scherer","year":"2019","unstructured":"Scherer, K.R., Moors, A.: The emotion process: event appraisal and component differentiation. Annu. Rev. Psychol. 70(1), 719\u2013745 (2019)","journal-title":"Annu. Rev. Psychol."},{"key":"10_CR31","doi-asserted-by":"crossref","unstructured":"Srinivasan, S., Huang, Z., Kirchhoff, K.: Representation learning through cross-modal conditional teacher-student training for speech emotion recognition. In: ICASSP 2022 - IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6442\u20136446 (2022)","DOI":"10.1109\/ICASSP43922.2022.9747754"},{"key":"10_CR32","doi-asserted-by":"crossref","unstructured":"Tarantino, L., Garner, P.N., Lazaridis, A., et\u00a0al.: Self-attention for speech emotion recognition. In: Interspeech, pp. 2578\u20132582 (2019)","DOI":"10.21437\/Interspeech.2019-2822"},{"key":"10_CR33","doi-asserted-by":"crossref","unstructured":"Trigeorgis, G., et al.: Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5200\u20135204 (2016)","DOI":"10.1109\/ICASSP.2016.7472669"},{"issue":"4","key":"10_CR34","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1177\/1754073915590617","volume":"7","author":"TJ Trull","year":"2015","unstructured":"Trull, T.J., Lane, S.P., Koval, P., Ebner-Priemer, U.W.: Affective dynamics in psychopathology. Emot. Rev. 7(4), 355\u2013361 (2015)","journal-title":"Emot. Rev."},{"issue":"5","key":"10_CR35","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1109\/TAI.2022.3149234","volume":"3","author":"G Tu","year":"2022","unstructured":"Tu, G., Wen, J., Liu, C., Jiang, D., Cambria, E.: Context- and sentiment-aware networks for emotion recognition in conversation. IEEE Trans. Artif. Intell. 3(5), 699\u2013708 (2022)","journal-title":"IEEE Trans. Artif. Intell."},{"key":"10_CR36","doi-asserted-by":"crossref","unstructured":"Tzirakis, P., Zhang, J., Schuller, B.W.: End-to-end speech emotion recognition using deep neural networks. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5089\u20135093 (2018)","DOI":"10.1109\/ICASSP.2018.8462677"},{"issue":"9","key":"10_CR37","doi-asserted-by":"publisher","first-page":"10745","DOI":"10.1109\/TPAMI.2023.3263585","volume":"45","author":"J Wagner","year":"2023","unstructured":"Wagner, J., et al.: Dawn of the transformer era in speech emotion recognition: closing the valence gap. IEEE Trans. Pattern Anal. Mach. Intell. 45(9), 10745\u201310759 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10_CR38","unstructured":"Wang, J., Mine, T.: Multi-task learning for emotion recognition in conversation with emotion shift. In: Proceedings of the 37th Pacific Asia Conference on Language, Information and Computation, pp. 257\u2013266. ACL, Hong Kong (2023)"},{"key":"10_CR39","doi-asserted-by":"crossref","unstructured":"Waugh, C.E., Kuppens, P.: Affect Dynamics. Springer (2021)","DOI":"10.1007\/978-3-030-82965-0"},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"W\u00f6llmer, M., et al.: Abandoning emotion classes \u2013 towards continuous emotion recognition with modelling of long-range dependencies. In: Proceedings INTERSPEECH, pp. 597\u2013600. ISCA, Brisbane (2008)","DOI":"10.21437\/Interspeech.2008-192"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-02548-7_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T18:05:06Z","timestamp":1757441106000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-02548-7_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,22]]},"ISBN":["9783032025470","9783032025487"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-02548-7_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,8,22]]},"assertion":[{"value":"22 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Erlangen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.kiv.zcu.cz\/tsd2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}