{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T03:21:52Z","timestamp":1773717712720,"version":"3.50.1"},"publisher-location":"Cham","reference-count":94,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031560262","type":"print"},{"value":"9783031560279","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56027-9_12","type":"book-chapter","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:02:49Z","timestamp":1710831769000},"page":"191-209","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Reading Between the\u00a0Frames: Multi-modal Depression Detection in\u00a0Videos from\u00a0Non-verbal Cues"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7375-9515","authenticated-orcid":false,"given":"David","family":"Gimeno-G\u00f3mez","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2433-8877","authenticated-orcid":false,"given":"Ana-Maria","family":"Bucur","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0307-2520","authenticated-orcid":false,"given":"Adrian","family":"Cosma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6139-2891","authenticated-orcid":false,"given":"Carlos-David","family":"Mart\u00ednez-Hinarejos","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8922-1242","authenticated-orcid":false,"given":"Paolo","family":"Rosso","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,20]]},"reference":[{"key":"12_CR1","first-page":"25","volume":"33","author":"JB Alayrac","year":"2020","unstructured":"Alayrac, J.B., et al.: Self-supervised multimodal versatile networks. Adv. Neural. Inf. Process. Syst. 33, 25\u201337 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"12_CR2","unstructured":"Alghowinem, S., Goecke, R., Wagner, M., Epps, J., Breakspear, M., Parker, G.: From joyous to clinically depressed: mood detection using spontaneous speech. In: FLAIRS Conference, vol. 19, pp. 141\u2013146 (2012)"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"American Psychiatric Association: Diagnostic and statistical manual of mental disorders: DSM-5. Autor, Washington, DC, 5th edn (2013)","DOI":"10.1176\/appi.books.9780890425596"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Bailey, A., Plumbley, M.D.: Gender bias in depression detection using audio features. In: 2021 29th European Signal Processing Conference (EUSIPCO), pp. 596\u2013600. IEEE (2021)","DOI":"10.23919\/EUSIPCO54536.2021.9615933"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis, T., Robinson, P., Morency, L.P.: Openface: an open source facial behavior analysis toolkit. In: IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1\u201310 (2016)","DOI":"10.1109\/WACV.2016.7477553"},{"key":"12_CR6","unstructured":"Bazarevsky, V., Grishchenko, I., Raveendran, K., Zhu, T., Zhang, F., Grundmann, M.: Blazepose: on-device real-time body pose tracking. arXiv preprint arXiv:2006.10204 (2020)"},{"issue":"2","key":"12_CR7","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., Frasconi, P.: Learning long-term dependencies with gradient descent is difficult. IEEE Trans. Neural Networks 5(2), 157\u2013166 (1994)","journal-title":"IEEE Trans. Neural Networks"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Bennabi, D., Vandel, P., Papaxanthis, C., Pozzo, T., Haffen, E.: Psychomotor retardation in depression: a systematic review of diagnostic, pathophysiologic, and therapeutic implications. BioMed Res. Int. 2013 (2013)","DOI":"10.1155\/2013\/158746"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Benton, A., Mitchell, M., Hovy, D.: Multitask learning for mental health conditions with limited social media data. In: Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, Valencia, Spain, pp. 152\u2013162. Association for Computational Linguistics (2017)","DOI":"10.18653\/v1\/E17-1015"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Birnbaum, M.L., et al.: Identifying signals associated with psychiatric illness utilizing language and images posted to Facebook. NPJ Schizophrenia 6(1), 1\u201310 (2020)","DOI":"10.1038\/s41537-020-00125-0"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Bredin, H., Laurent, A.: End-to-end speaker segmentation for overlap-aware resegmentation. In: Proceedings of Interspeech, pp. 3111\u20133115 (2021)","DOI":"10.21437\/Interspeech.2021-560"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Bredin, H., et al.: Pyannote. Audio: neural building blocks for speaker diarization. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7124\u20137128 (2020)","DOI":"10.1109\/ICASSP40776.2020.9052974"},{"key":"12_CR13","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1007\/978-3-031-28244-7_13","volume-title":"Advances in Information Retrieval","author":"AM Bucur","year":"2023","unstructured":"Bucur, A.M., Cosma, A., Rosso, P., Dinu, L.P.: It\u2019s just a matter of time: detecting depression with time-enriched multimodal transformers. In: Kamps, J., et al. (eds.) ECIR 2023. LNCS, vol. 13980, pp. 200\u2013215. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-28244-7_13"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Bulat, A., Tzimiropoulos, G.: How far are we from solving the 2D & 3D face alignment problem? (and a dataset of 230,000 3D facial landmarks). In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1021\u20131030 (2017)","DOI":"10.1109\/ICCV.2017.116"},{"key":"12_CR15","doi-asserted-by":"publisher","first-page":"117822261879286","DOI":"10.1177\/1178222618792860","volume":"10","author":"G Coppersmith","year":"2018","unstructured":"Coppersmith, G., Leary, R., Crutchley, P., Fine, A.: Natural language processing of social media as screening for suicide risk. Biomed. Inform. Insights 10, 1178222618792860 (2018)","journal-title":"Biomed. Inform. Insights"},{"issue":"4","key":"12_CR16","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., Mermelstein, P.: Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Signal Process. 28(4), 357\u2013366 (1980)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Degottex, G., Kane, J., Drugman, T., Raitio, T., Scherer, S.: Covarep - a collaborative voice analysis repository for speech technologies. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 960\u2013964 (2014)","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"12_CR18","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics (2019)"},{"issue":"2","key":"12_CR19","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1109\/JBHI.2017.2676878","volume":"22","author":"H Dibeklio\u011flu","year":"2017","unstructured":"Dibeklio\u011flu, H., Hammal, Z., Cohn, J.F.: Dynamic multimodal measurement of depression severity using deep autoencoding. IEEE J. Biomed. Health Inform. 22(2), 525\u2013536 (2017)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"12_CR20","first-page":"21981","volume":"33","author":"C Doersch","year":"2020","unstructured":"Doersch, C., Gupta, A., Zisserman, A.: Crosstransformers: spatially-aware few-shot transfer. Adv. Neural. Inf. Process. Syst. 33, 21981\u201321993 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"2","key":"12_CR21","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2016","unstructured":"Eyben, F., et al.: The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Trans. Affect. Comput. 7(2), 190\u2013202 (2016)","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"11","key":"12_CR22","doi-asserted-by":"publisher","first-page":"14470","DOI":"10.1007\/s10489-022-04216-6","volume":"53","author":"Z Fang","year":"2023","unstructured":"Fang, Z., Liu, Z., Hung, C.C., Sekhavat, Y.A., Liu, T., Wang, X.: Learning coordinated emotion representation between voice and face. Appl. Intell. 53(11), 14470\u201314492 (2023)","journal-title":"Appl. Intell."},{"key":"12_CR23","unstructured":"Fern\u00e1ndez-Barrera, I., Bravo-Bustos, S., Vidal, M.: Evaluating the social media users\u2019 mental health status during covid-19 pandemic using deep learning. In: International Conference on Biomedical and Health Informatics, vol. 14 (2022)"},{"issue":"3","key":"12_CR24","doi-asserted-by":"publisher","first-page":"1114","DOI":"10.1016\/j.jad.2013.05.002","volume":"150","author":"JT Fiquer","year":"2013","unstructured":"Fiquer, J.T., Boggio, P.S., Gorenstein, C.: Talking bodies: nonverbal behavior in the assessment of depression severity. J. Affect. Disord. 150(3), 1114\u20131119 (2013)","journal-title":"J. Affect. Disord."},{"key":"12_CR25","unstructured":"Friesen, E., Ekman, P.: Facial action coding system: a technique for the measurement of facial movement. Palo Alto University, California, vol. 3, no. 2, p. 5 (1978)"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Gales, M., Young, S.: The Application of Hidden Markov Models in Speech Recognition. Now Publishers Inc. (2008)","DOI":"10.1561\/9781601981219"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Girard, J.M., Cohn, J.F., Mahoor, M.H., Mavadati, S., Rosenwald, D.P.: Social risk and depression: evidence from manual and automatic facial expression analysis. In: 2013 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), pp. 1\u20138. IEEE (2013)","DOI":"10.1109\/FG.2013.6553748"},{"key":"12_CR28","unstructured":"Gratch, J., et al.: The distress analysis interview corpus of human and computer interviews. In: LREC, pp. 3123\u20133128 (2014)"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Gui, T., et al.: Cooperative multimodal approach to depression detection in twitter. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 110\u2013117 (2019)","DOI":"10.1609\/aaai.v33i01.3301110"},{"issue":"2","key":"12_CR30","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1016\/j.imavis.2012.06.016","volume":"31","author":"H Gunes","year":"2013","unstructured":"Gunes, H., Schuller, B.: Categorical and dimensional affect analysis in continuous input: current trends and future directions. Image Vis. Comput. 31(2), 120\u2013136 (2013)","journal-title":"Image Vis. Comput."},{"key":"12_CR31","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1007\/978-3-030-86383-8_35","volume-title":"Artificial Neural Networks and Machine Learning - ICANN 2021","author":"A Haque","year":"2021","unstructured":"Haque, A., Reddi, V., Giallanza, T.: Deep learning for suicide and depression identification with unsupervised label correction. In: Farka\u0161, I., Masulli, P., Otte, S., Wermter, S. (eds.) ICANN 2021. LNCS, vol. 12895, pp. 436\u2013447. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86383-8_35"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Harzig, P., Einfalt, M., Lienhart, R.: Synchronized audio-visual frames with fractional positional encoding for transformers in video-to-text translation. In: 2022 IEEE International Conference on Image Processing (ICIP), pp. 2041\u20132045 (2022)","DOI":"10.1109\/ICIP46576.2022.9897804"},{"key":"12_CR33","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"12_CR34","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.inffus.2021.10.012","volume":"80","author":"L He","year":"2022","unstructured":"He, L., et al.: Deep learning for depression recognition with audiovisual cues: a review. Inf. Fusion 80, 56\u201386 (2022)","journal-title":"Inf. Fusion"},{"key":"12_CR35","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"12_CR36","unstructured":"Jaegle, A., Gimeno, F., Brock, A., Vinyals, O., Zisserman, A., Carreira, J.: Perceiver: general perception with iterative attention. In: International Conference on Machine Learning, pp. 4651\u20134664. PMLR (2021)"},{"issue":"2","key":"12_CR37","volume":"2","author":"R Kaushik","year":"2023","unstructured":"Kaushik, R., Gaur, S., Pandit, J.N., Satapathy, S., Behera, C.: Live streaming of suicide on Facebook. Psychiatry Res. Case Rep. 2(2), 100141 (2023)","journal-title":"Psychiatry Res. Case Rep."},{"key":"12_CR38","unstructured":"Lee, J., Lee, Y., Kim, J., Kosiorek, A., Choi, S., Teh, Y.W.: Set transformer: a framework for attention-based permutation-invariant neural networks. In: International Conference on Machine Learning, pp. 3744\u20133753. PMLR (2019)"},{"issue":"6","key":"12_CR39","doi-asserted-by":"publisher","DOI":"10.2196\/14199","volume":"21","author":"A Leis","year":"2019","unstructured":"Leis, A., Ronzano, F., Mayer, M.A., Furlong, L.I., Sanz, F.: Detecting signs of depression in tweets in Spanish: behavioral and linguistic analysis. J. Med. Internet Res. 21(6), e14199 (2019)","journal-title":"J. Med. Internet Res."},{"key":"12_CR40","doi-asserted-by":"crossref","unstructured":"Li, B., Xiong, P., Han, C., Guo, T.: Shrinking temporal attention in transformers for video action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 1263\u20131271 (2022)","DOI":"10.1609\/aaai.v36i2.20013"},{"key":"12_CR41","doi-asserted-by":"crossref","unstructured":"Lucas, G.M., Gratch, J., Scherer, S., Boberg, J., Stratou, G.: Towards an affective interface for assessment of psychological distress. In: 2015 International Conference on Affective Computing and Intelligent Interaction (ACII), pp. 539\u2013545. IEEE (2015)","DOI":"10.1109\/ACII.2015.7344622"},{"key":"12_CR42","doi-asserted-by":"crossref","unstructured":"Ma, X., Yang, H., Chen, Q., Huang, D., Wang, Y.: Depaudionet: an efficient deep model for audio based depression classification. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, AVEC 2016, pp. 35\u201342. Association for Computing Machinery (2016)","DOI":"10.1145\/2988257.2988267"},{"issue":"1","key":"12_CR43","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1192\/bjp.143.1.55","volume":"143","author":"J Mackintosh","year":"1983","unstructured":"Mackintosh, J., Kumar, R., Kitamura, T.: Blink rate in psychiatric illness. Br. J. Psychiatry 143(1), 55\u201357 (1983)","journal-title":"Br. J. Psychiatry"},{"key":"12_CR44","unstructured":"Nguyen, D.K., et al.: Multimodal transformer for automatic depression estimation system. In: The 29th International Workshop on Frontiers of Computer Vision (2023)"},{"key":"12_CR45","doi-asserted-by":"crossref","unstructured":"Oureshi, S.A., Dias, G., Saha, S., Hasanuzzaman, M.: Gender-aware estimation of depression severity level in a multimodal setting. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138. IEEE (2021)","DOI":"10.1109\/IJCNN52387.2021.9534330"},{"key":"12_CR46","doi-asserted-by":"crossref","unstructured":"Pampouchidou, A., et al.: Depression assessment by fusing high and low level features from audio, video, and text. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, pp. 27\u201334 (2016)","DOI":"10.1145\/2988257.2988266"},{"issue":"4","key":"12_CR47","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1109\/TAFFC.2017.2724035","volume":"10","author":"A Pampouchidou","year":"2017","unstructured":"Pampouchidou, A., et al.: Automatic assessment of depression based on visual cues: a systematic review. IEEE Trans. Affect. Comput. 10(4), 445\u2013470 (2017)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"12_CR48","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: International Conference on Machine Learning, pp. 1310\u20131318. PMLR (2013)"},{"key":"12_CR49","doi-asserted-by":"crossref","unstructured":"Pascual, S., Ravanelli, M., Serr\u00e0, J., Bonafonte, A., Bengio, Y.: Learning problem-agnostic speech representations from multiple self-supervised tasks. In: Proceedings of Interspeech, pp. 161\u2013165 (2019)","DOI":"10.21437\/Interspeech.2019-2605"},{"key":"12_CR50","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/j.jad.2021.05.055","volume":"292","author":"A Pavlidou","year":"2021","unstructured":"Pavlidou, A., et al.: Hand gesture performance is impaired in major depressive disorder: a matter of working memory performance? J. Affect. Disord. 292, 81\u201388 (2021)","journal-title":"J. Affect. Disord."},{"key":"12_CR51","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1007\/978-3-031-28241-6_30","volume-title":"Advances in Information Retrieval","author":"A P\u00e9rez","year":"2023","unstructured":"P\u00e9rez, A., Piot-P\u00e9rez-Abad\u00edn, P., Parapar, J., Barreiro, \u00c1.: Psyprof: a platform for assisted screening of depression in social media. In: Kamps, J., et al. (eds.) ECIR 2023. LNCS, vol. 13982, pp. 300\u2013306. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-28241-6_30"},{"key":"12_CR52","doi-asserted-by":"crossref","unstructured":"Pirina, I., \u00c7\u00f6ltekin, \u00c7.: Identifying depression on Reddit: the effect of training data. In: Proceedings of the 2018 EMNLP Workshop SMM4H: The 3rd Social Media Mining for Health Applications Workshop & Shared Task, pp. 9\u201312 (2018)","DOI":"10.18653\/v1\/W18-5903"},{"key":"12_CR53","doi-asserted-by":"crossref","unstructured":"Ravanelli, M., et al.: Multi-task self-supervised learning for robust speech recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6989\u20136993 (2020)","DOI":"10.1109\/ICASSP40776.2020.9053569"},{"key":"12_CR54","doi-asserted-by":"crossref","unstructured":"Ray, A., Kumar, S., Reddy, R., Mukherjee, P., Garg, R.: Multi-level attention network using text, audio and video for depression prediction. In: Proceedings of the 9th International on Audio\/Visual Emotion Challenge and Workshop, pp. 81\u201388 (2019)","DOI":"10.1145\/3347320.3357697"},{"issue":"1","key":"12_CR55","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1140\/epjds\/s13688-017-0110-z","volume":"6","author":"AG Reece","year":"2017","unstructured":"Reece, A.G., Danforth, C.M.: Instagram photos reveal predictive markers of depression. EPJ Data Sci. 6(1), 15 (2017)","journal-title":"EPJ Data Sci."},{"issue":"3","key":"12_CR56","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1016\/j.jbtep.2005.05.002","volume":"36","author":"B Renneberg","year":"2005","unstructured":"Renneberg, B., Heyn, K., Gebhard, R., Bachmann, S.: Facial expression of emotions in borderline personality disorder and depression. J. Behav. Ther. Exp. Psychiatry 36(3), 183\u2013196 (2005)","journal-title":"J. Behav. Ther. Exp. Psychiatry"},{"key":"12_CR57","doi-asserted-by":"crossref","unstructured":"Ringeval, F., et al.: AVEC 2019 workshop and challenge: state-of-mind, detecting depression with AI, and cross-cultural affect recognition. In: Proceedings of the 9th International on Audio\/Visual Emotion Challenge and Workshop, pp. 3\u201312 (2019)","DOI":"10.1145\/3347320.3357688"},{"key":"12_CR58","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1007\/978-3-030-45439-5_20","volume-title":"Advances in Information Retrieval","author":"EA R\u00edssola","year":"2020","unstructured":"R\u00edssola, E.A., Aliannejadi, M., Crestani, F.: Beyond modelling: understanding mental disorders in online social media. In: Jose, J.M., et al. (eds.) ECIR 2020. LNCS, vol. 12035, pp. 296\u2013310. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-45439-5_20"},{"key":"12_CR59","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-0-387-29986-0_8","volume-title":"Emotion Regulation: Conceptual and Clinical Issues","author":"J Rottenberg","year":"2008","unstructured":"Rottenberg, J., Vaughan, C.: Emotion expression in depression: emerging evidence for emotion context-insensitivity. In: Vingerhoets, A.J., Nykl\u00ed\u010dek, I., Denollet, J. (eds.) Emotion Regulation: Conceptual and Clinical Issues, pp. 125\u2013139. Springer, Boston (2008). https:\/\/doi.org\/10.1007\/978-0-387-29986-0_8"},{"issue":"6","key":"12_CR60","doi-asserted-by":"publisher","first-page":"1161","DOI":"10.1037\/h0077714","volume":"39","author":"JA Russell","year":"1980","unstructured":"Russell, J.A.: A circumplex model of affect. J. Pers. Soc. Psychol. 39(6), 1161 (1980)","journal-title":"J. Pers. Soc. Psychol."},{"issue":"1","key":"12_CR61","first-page":"24","volume":"15","author":"GS Saggu","year":"2022","unstructured":"Saggu, G.S., Gupta, K., Arya, K., Rodriguez, C.R.: Depressnet: a multimodal hierarchical attention mechanism approach for depression detection. Int. J. Eng. Sci. 15(1), 24\u201332 (2022)","journal-title":"Int. J. Eng. Sci."},{"key":"12_CR62","doi-asserted-by":"crossref","unstructured":"Shen, G., et al.: Depression detection via harvesting social media: a multimodal dictionary learning solution. In: Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence. International Joint Conferences on Artificial Intelligence Organization (2017)","DOI":"10.24963\/ijcai.2017\/536"},{"issue":"7","key":"12_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3543848","volume":"55","author":"WC Sleeman IV","year":"2022","unstructured":"Sleeman, W.C., IV., Kapoor, R., Ghosh, P.: Multimodal classification: current landscape, taxonomy and future directions. ACM Comput. Surv. 55(7), 1\u201331 (2022)","journal-title":"ACM Comput. Surv."},{"key":"12_CR64","doi-asserted-by":"crossref","unstructured":"Song, S., Shen, L., Valstar, M.: Human behaviour-based automatic depression analysis using hand-crafted statistics and deep learned spectral features. In: 13th IEEE International Conference on Automatic Face & Gesture Recognition (FG), pp. 158\u2013165 (2018)","DOI":"10.1109\/FG.2018.00032"},{"key":"12_CR65","unstructured":"Su, Y., Lan, T., Li, H., Xu, J., Wang, Y., Cai, D.: PandaGPT: one model to instruction-follow them all. arXiv preprint arXiv:2305.16355 (2023)"},{"key":"12_CR66","unstructured":"Sundararajan, M., Taly, A., Yan, Q.: Axiomatic attribution for deep networks. In: International Conference on Machine Learning, pp. 3319\u20133328. PMLR (2017)"},{"key":"12_CR67","doi-asserted-by":"crossref","unstructured":"Tao, Y., Yang, M., Wu, Y., Lee, K., Kline, A., Hu, B.: Depressive semantic awareness from vlog facial and vocal streams via spatio-temporal transformer. Digit. Commun. Netw. (2023)","DOI":"10.1016\/j.dcan.2023.03.007"},{"issue":"1","key":"12_CR68","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1176\/appi.ajp.163.1.73","volume":"163","author":"BP Taylor","year":"2006","unstructured":"Taylor, B.P., et al.: Psychomotor slowing as a predictor of fluoxetine nonresponse in depressed outpatients. Am. J. Psychiatry 163(1), 73\u201378 (2006)","journal-title":"Am. J. Psychiatry"},{"issue":"1","key":"12_CR69","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1038\/s42256-020-00280-0","volume":"3","author":"A Toisoul","year":"2021","unstructured":"Toisoul, A., Kossaifi, J., Bulat, A., Tzimiropoulos, G., Pantic, M.: Estimation of continuous valence and arousal levels from faces in naturalistic conditions. Nat. Mach. Intell. 3(1), 42\u201350 (2021)","journal-title":"Nat. Mach. Intell."},{"key":"12_CR70","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-030-45442-5_50","volume-title":"Advances in Information Retrieval","author":"A Trifan","year":"2020","unstructured":"Trifan, A., Antunes, R., Matos, S., Oliveira, J.L.: Understanding depression from psycholinguistic patterns in social media texts. In: Jose, J.M., et al. (eds.) ECIR 2020. LNCS, vol. 12036, pp. 402\u2013409. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-45442-5_50"},{"key":"12_CR71","doi-asserted-by":"crossref","unstructured":"Valstar, M., et al.: AVEC 2013: the continuous audio\/visual emotion and depression recognition challenge. In: Proceedings of the 3rd ACM International Workshop on Audio\/Visual Emotion Challenge, pp. 3\u201310 (2013)","DOI":"10.1145\/2512530.2512533"},{"key":"12_CR72","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, vol. 30, pp. 6000\u20136010 (2017)"},{"key":"12_CR73","doi-asserted-by":"crossref","unstructured":"Villatoro-Tello, E., Ram\u00edrez-de-la Rosa, G., G\u00e1tica-P\u00e9rez, D., Magimai.-Doss, M., Jim\u00e9nez-Salazar, H.: Approximating the mental lexicon from clinical interviews as a support tool for depression detection. In: Proceedings of the 2021 International Conference on Multimodal Interaction, pp. 557\u2013566 (2021)","DOI":"10.1145\/3462244.3479896"},{"key":"12_CR74","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12888-019-2300-7","volume":"19","author":"J Wang","year":"2019","unstructured":"Wang, J., Zhang, L., Liu, T., Pan, W., Hu, B., Zhu, T.: Acoustic differences between healthy and depressed people: a cross-situation study. BMC Psychiatry 19, 1\u201312 (2019)","journal-title":"BMC Psychiatry"},{"key":"12_CR75","doi-asserted-by":"crossref","unstructured":"Wang, Y.A., Chen, Y.N.: What do position embeddings learn? An empirical study of pre-trained language model positional encoding. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6840\u20136849 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.555"},{"key":"12_CR76","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1007\/978-3-031-25075-0_42","volume-title":"Computer Vision - ECCV Workshops","author":"PC Wei","year":"2023","unstructured":"Wei, P.C., Peng, K., Roitberg, A., Yang, K., Zhang, J., Stiefelhagen, R.: Multi-modal depression estimation based on sub-attentional fusion. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) ECCV 2022. LNCS, vol. 13806, pp. 623\u2013639. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-25075-0_42"},{"key":"12_CR77","doi-asserted-by":"crossref","unstructured":"Williamson, J.R., et al.: Detecting depression using vocal, facial and semantic communication cues. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, AVEC 2016, pp. 11\u201318. Association for Computing Machinery (2016)","DOI":"10.1145\/2988257.2988263"},{"key":"12_CR78","doi-asserted-by":"crossref","unstructured":"Williamson, J.R., Quatieri, T.F., Helfer, B.S., Ciccarelli, G., Mehta, D.D.: Vocal and facial biomarkers of depression based on motor incoordination and timing. In: Proceedings of the 4th International Workshop on Audio\/Visual Emotion Challenge, pp. 65\u201372 (2014)","DOI":"10.1145\/2661806.2661809"},{"key":"12_CR79","unstructured":"Wolohan, J., Hiraga, M., Mukherjee, A., Sayyed, Z.A., Millard, M.: Detecting linguistic traces of depression in topic-restricted text: attending to self-stigmatized depression with NLP. In: Proceedings of the First International Workshop on Language Cognition and Computational Models, pp. 11\u201321. Association for Computational Linguistics (2018)"},{"key":"12_CR80","doi-asserted-by":"crossref","unstructured":"Xu, H., Bazavan, E.G., Zanfir, A., Freeman, W.T., Sukthankar, R., Sminchisescu, C.: GHUM & GHUML: generative 3D human shape and articulated pose models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6183\u20136192 (2020)","DOI":"10.1109\/CVPR42600.2020.00622"},{"key":"12_CR81","unstructured":"Xu, X., et al.: Leveraging large language models for mental health prediction via online text data (2023)"},{"key":"12_CR82","doi-asserted-by":"crossref","unstructured":"Yadav, S., Caragea, C., Zhao, C., Kumari, N., Solberg, M., Sharma, T.: Towards identifying fine-grained depression symptoms from memes. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, pp. 8890\u20138905 (2023)","DOI":"10.18653\/v1\/2023.acl-long.495"},{"issue":"9","key":"12_CR83","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0238726","volume":"15","author":"M Yamamoto","year":"2020","unstructured":"Yamamoto, M., et al.: Using speech recognition technology to investigate the association between timing-related speech features and depression severity. PLoS ONE 15(9), e0238726 (2020)","journal-title":"PLoS ONE"},{"key":"12_CR84","doi-asserted-by":"crossref","unstructured":"Yang, K., Zhang, T., Kuang, Z., Xie, Q., Ananiadou, S.: Mentalllama: interpretable mental health analysis on social media with large language models. arXiv preprint arXiv:2309.13567 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.370"},{"key":"12_CR85","doi-asserted-by":"crossref","unstructured":"Yates, A., Cohan, A., Goharian, N.: Depression and self-harm risk assessment in online forums. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 2968\u20132978. Association for Computational Linguistics (2017)","DOI":"10.18653\/v1\/D17-1322"},{"key":"12_CR86","doi-asserted-by":"crossref","unstructured":"Yoon, J., Kang, C., Kim, S., Han, J.: D-vlog: multimodal vlog dataset for depression detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 12226\u201312234 (2022)","DOI":"10.1609\/aaai.v36i11.21483"},{"key":"12_CR87","doi-asserted-by":"crossref","unstructured":"Zanwar, S., Wiechmann, D., Qiao, Y., Kerz, E.: SMHD-GER: a large-scale benchmark dataset for automatic mental health detection from social media in German. In: Findings of the Association for Computational Linguistics: EACL 2023, Dubrovnik, Croatia, pp. 1526\u20131541. Association for Computational Linguistics (2023)","DOI":"10.18653\/v1\/2023.findings-eacl.113"},{"key":"12_CR88","doi-asserted-by":"crossref","unstructured":"Zeng, W., et al.: Real-time multi-person eyeblink detection in the wild for untrimmed video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13854\u201313863 (2023)","DOI":"10.1109\/CVPR52729.2023.01331"},{"key":"12_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, X., Park, S., Beeler, T., Bradley, D., Tang, S., Hilliges, O.: ETH-XGaze: a large scale dataset for gaze estimation under extreme head pose and gaze variation. In: European Conference on Computer Vision (ECCV), pp. 365\u2013381 (2020)","DOI":"10.1007\/978-3-030-58558-7_22"},{"key":"12_CR90","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Associations between depression symptom severity and daily-life gait characteristics derived from long-term acceleration signals in real-world settings: retrospective analysis. JMIR mHealth uHealth 10(10), e40667 (2022)","DOI":"10.2196\/40667"},{"key":"12_CR91","doi-asserted-by":"crossref","unstructured":"Zheng, W., Yan, L., Wang, F.Y.: Two birds with one stone: knowledge-embedded temporal convolutional transformer for depression detection and emotion recognition. IEEE Trans. Affect. Comput. 1\u201318 (2023)","DOI":"10.1109\/TAFFC.2023.3282704"},{"key":"12_CR92","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1109\/TNSRE.2022.3224135","volume":"31","author":"L Zhou","year":"2022","unstructured":"Zhou, L., Liu, Z., Shangguan, Z., Yuan, X., Li, Y., Hu, B.: TAMFN: time-aware attention multimodal fusion network for depression detection. IEEE Trans. Neural Syst. Rehabil. Eng. 31, 669\u2013679 (2022)","journal-title":"IEEE Trans. Neural Syst. Rehabil. Eng."},{"key":"12_CR93","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2023.103986","volume":"137","author":"L Zhou","year":"2023","unstructured":"Zhou, L., Liu, Z., Yuan, X., Shangguan, Z., Li, Y., Hu, B.: CAIINET: neural network based on contextual attention and information interaction mechanism for depression detection. Digit. Signal Process. 137, 103986 (2023)","journal-title":"Digit. Signal Process."},{"key":"12_CR94","unstructured":"Zhu, D., Chen, J., Shen, X., Li, X., Elhoseiny, M.: MiniGPT-4: enhancing vision-language understanding with advanced large language models (2023)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56027-9_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:07:36Z","timestamp":1710832056000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56027-9_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031560262","9783031560279"],"references-count":94,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56027-9_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"20 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 March 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 March 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ecir2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"578","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"69","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31 (Tracks: Workshop, Tutorial, Industry, Doctoral Consortium)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}