{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:19:02Z","timestamp":1774365542756,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"name":"Ministry of Science, ICT","award":["RS-2024-00425354,IITP-2024-RS-2023-00254129"],"award-info":[{"award-number":["RS-2024-00425354,IITP-2024-RS-2023-00254129"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679797","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"1049-1059","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["HiQuE: Hierarchical Question Embedding Network for Multimodal Depression Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3960-4184","authenticated-orcid":false,"given":"Juho","family":"Jung","sequence":"first","affiliation":[{"name":"Dept. of Applied Artificial Intelligence, Sungkyunkwan University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5255-540X","authenticated-orcid":false,"given":"Chaewon","family":"Kang","sequence":"additional","affiliation":[{"name":"Dept. of Applied Artificial Intelligence, Sungkyunkwan University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9067-8653","authenticated-orcid":false,"given":"Jeewoo","family":"Yoon","sequence":"additional","affiliation":[{"name":"Raondata, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5667-3560","authenticated-orcid":false,"given":"Seungbae","family":"Kim","sequence":"additional","affiliation":[{"name":"Computer Science &amp; Engineering, University of South Florida, Tampa, FL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8911-2791","authenticated-orcid":false,"given":"Jinyoung","family":"Han","sequence":"additional","affiliation":[{"name":"Dept. of Applied Artificial Intelligence, Sungkyunkwan University, Seoul, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Osdi","volume":"16","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et al. 2016. Tensorflow: a system for large-scale machine learning.. In Osdi, Vol. 16. Savannah, GA, USA, 265--283."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Tuka Al Hanai Mohammad M Ghassemi and James R Glass. 2018. Detecting Depression with Audio\/Text Sequence Modeling of Interviews.. In Interspeech. 1716--1720.","DOI":"10.21437\/Interspeech.2018-2522"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1177\/2167702617747074"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2016.2634527"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2013.53"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0165-0327(00)00335-9"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO54536.2021.9615933"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.54"},{"key":"e_1_3_2_1_9_1","volume-title":"Voice acoustical measurement of the severity of major depression. Brain and cognition 56, 1","author":"Cannizzaro Michael","year":"2004","unstructured":"Michael Cannizzaro, Brian Harel, Nicole Reilly, Phillip Chappell, and Peter J Snyder. 2004. Voice acoustical measurement of the severity of major depression. Brain and cognition 56, 1 (2004), 30--35."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1566"},{"key":"e_1_3_2_1_11_1","volume-title":"Speechformer: A hierarchical efficient framework incorporating the characteristics of speech. arXiv preprint arXiv:2203.03812","author":"Chen Weidong","year":"2022","unstructured":"Weidong Chen, Xiaofen Xing, Xiangmin Xu, Jianxin Pang, and Lan Du. 2022. Speechformer: A hierarchical efficient framework incorporating the characteristics of speech. arXiv preprint arXiv:2203.03812 (2022)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2021.104499"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2512530.2512535"},{"key":"e_1_3_2_1_14_1","volume-title":"A review of depression and suicide risk assessment using speech analysis. Speech communication 71","author":"Cummins Nicholas","year":"2015","unstructured":"Nicholas Cummins, Stefan Scherer, Jarek Krajewski, Sebastian Schnieder, Julien Epps, and Thomas F Quatieri. 2015. A review of depression and suicide risk assessment using speech analysis. Speech communication 71 (2015), 10--49."},{"key":"e_1_3_2_1_15_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"Depression scale recognition from audio, visual and text analysis. arXiv preprint arXiv:1709.05865","author":"Dham Shubham","year":"2017","unstructured":"Shubham Dham, Anirudh Sharma, and Abhinav Dhall. 2017. Depression scale recognition from audio, visual and text analysis. arXiv preprint arXiv:1709.05865 (2017)."},{"key":"e_1_3_2_1_17_1","volume-title":"Dynamic multimodal measurement of depression severity using deep autoencoding","author":"Dibeklio'lu Hamdi","year":"2017","unstructured":"Hamdi Dibeklio'lu, Zakia Hammal, and Jeffrey F Cohn. 2017. Dynamic multimodal measurement of depression severity using deep autoencoding. IEEE journal of biomedical and health informatics 22, 2 (2017), 525--536."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2820776"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.02.019"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02253071"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Florian Eyben Klaus R Scherer Bj\u00f6rn W Schuller Johan Sundberg Elisabeth Andr\u00e9 Carlos Busso Laurence Y Devillers Julien Epps Petri Laukka Shrikanth S Narayanan et al. 2015. The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE transactions on affective computing 7 2 (2015) 190--202.","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2022.104561"},{"key":"e_1_3_2_1_24_1","volume-title":"Deep Learning Applications","author":"Flores Ricardo","unstructured":"Ricardo Flores, ML Tlachac, Ermal Toto, and Elke Rundensteiner. 2022. Transfer learning for depression screening from follow-up clinical interview questions. In Deep Learning Applications, Volume 4. Springer, 53--78."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA52953.2021.00099"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1382"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133944.3133945"},{"key":"e_1_3_2_1_28_1","unstructured":"Jonathan Gratch Ron Artstein Gale Lucas Giota Stratou Stefan Scherer Angela Nazarian Rachel Wood Jill Boberg David DeVault Stacy Marsella et al. 2014. The distress analysis interview corpus of human and computer interviews. Technical Report. University of Southern California Los Angeles."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1159\/000317532"},{"key":"e_1_3_2_1_30_1","volume-title":"Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100","author":"Gulati Anmol","year":"2020","unstructured":"Anmol Gulati, James Qin, Chung-Cheng Chiu, Niki Parmar, Yu Zhang, Jiahui Yu, Wei Han, Shibo Wang, Zhengdong Zhang, Yonghui Wu, et al. 2020. Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)."},{"key":"e_1_3_2_1_31_1","volume-title":"Automatic depression detection via learning and fusing features from visual cues","author":"Guo Yanrong","year":"2022","unstructured":"Yanrong Guo, Chenyang Zhu, Shijie Hao, and Richang Hong. 2022. Automatic depression detection via learning and fusing features from visual cues. IEEE Transactions on Computational Social Systems (2022)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.im.2020.103349"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661806.2661810"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i14.17534"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Emil Kraepelin. 1921. Manic-depressive insanity and paranoia. E. & S. Livingstone.","DOI":"10.1097\/00005053-192104000-00057"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683027"},{"key":"e_1_3_2_1_38_1","volume-title":"Learning Co-Speech Gesture for Multimodal Aphasia Type Detection. arXiv preprint arXiv:2310.11710","author":"Lee Daeun","year":"2023","unstructured":"Daeun Lee, Sejung Son, Hyolim Jeon, Seungbae Kim, and Jinyoung Han. 2023. Learning Co-Speech Gesture for Multimodal Aphasia Type Detection. arXiv preprint arXiv:2310.11710 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_40_1","first-page":"326","article-title":"Eye movement indices in the study of depressive disorder","volume":"28","author":"Li Yu","year":"2016","unstructured":"Yu Li, Yangyang Xu, Mengqing Xia, Tianhong Zhang, Junjie Wang, Xu Liu, Yongguang He, and Jijun Wang. 2016. Eye movement indices in the study of depressive disorder. Shanghai Archives of Psychiatry 28, 6 (2016), 326.","journal-title":"Shanghai Archives of Psychiatry"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10238701"},{"key":"e_1_3_2_1_42_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988267"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016818"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019--2036"},{"key":"e_1_3_2_1_46_1","volume-title":"Clinician-identified depression in community settings: concordance with structured-interview diagnoses. Psychotherapy and psychosomatics 82, 3","author":"Mojtabai Ramin","year":"2013","unstructured":"Ramin Mojtabai. 2013. Clinician-identified depression in community settings: concordance with structured-interview diagnoses. Psychotherapy and psychosomatics 82, 3 (2013), 161--169."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-3101"},{"key":"e_1_3_2_1_48_1","volume-title":"Speech vs. text: A comparative analysis of features for depression detection systems. In 2016 IEEE spoken language technology workshop (SLT)","author":"Morales Michelle Renee","unstructured":"Michelle Renee Morales and Rivka Levitan. 2016. Speech vs. text: A comparative analysis of features for depression detection systems. In 2016 IEEE spoken language technology workshop (SLT). IEEE, 136--143."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2070481.2070509"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2016.2614299"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/IICETA50496.2020.9318860"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413486"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/0165-0327(85)90014-X"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1081"},{"key":"e_1_3_2_1_56_1","volume-title":"International Conference on Machine Learning. PMLR, 28492--28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, JongWook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International Conference on Machine Learning. PMLR, 28492--28518."},{"key":"e_1_3_2_1_57_1","unstructured":"Alec Radford JeffreyWu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Fabien Ringeval Bj\u00f6rn Schuller Michel Valstar Nicholas Cummins Roddy Cowie Leili Tavabi Maximilian Schmitt Sina Alisamir Shahin Amiriparian Eva-Maria Messner et al. 2019. AVEC 2019 workshop and challenge: state-of-mind detecting depression with AI and cross-cultural affect recognition. In Proceedings of the 9th International on Audio\/visual Emotion Challenge and Workshop. 3--12.","DOI":"10.1145\/3347320.3357688"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Morteza Rohanian Julian Hough Matthew Purver et al. 2019. Detecting Depression with Word-Level Multimodal Fusion.. In Interspeech. 1443--1447.","DOI":"10.21437\/Interspeech.2019-2283"},{"key":"e_1_3_2_1_60_1","volume-title":"Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al.","author":"Roziere Baptiste","year":"2023","unstructured":"Baptiste Roziere, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:2308.12950 (2023)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1080\/02699930441000030"},{"key":"e_1_3_2_1_62_1","first-page":"24","article-title":"DepressNet: A Multimodal Hierarchical Attention Mechanism approach for Depression","volume":"15","author":"Saggu Guramritpal Singh","year":"2022","unstructured":"Guramritpal Singh Saggu, Keshav Gupta, KV Arya, and Ciro Rodriguez Rodriguez. 2022. DepressNet: A Multimodal Hierarchical Attention Mechanism approach for Depression Detection. Int. J. Eng. Sci. 15, 1 (2022), 24--32.","journal-title":"Detection. Int. J. Eng. Sci."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.116076"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"crossref","unstructured":"Aharon Satt Shai Rozenberg Ron Hoory et al. 2017. Efficient emotion recognition from speech using deep learning on spectrograms.. In Interspeech. 1089--1093.","DOI":"10.21437\/Interspeech.2017-200"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746569"},{"key":"e_1_3_2_1_66_1","volume-title":"The diagnosis of depression: current and emerging methods. Comprehensive psychiatry 54, 1","author":"Smith Katie M","year":"2013","unstructured":"Katie M Smith, Perry F Renshaw, and John Bilello. 2013. The diagnosis of depression: current and emerging methods. Comprehensive psychiatry 54, 1 (2013)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2017.08.003"},{"key":"e_1_3_2_1_68_1","volume-title":"Comparison of self-report and structured clinical interview in the identification of depression. Comprehensive psychiatry 55, 4","author":"Stuart Amanda L","year":"2014","unstructured":"Amanda L Stuart, Julie A Pasco, Felice N Jacka, Sharon L Brennan, Michael Berk, and Lana J Williams. 2014. Comparison of self-report and structured clinical interview in the identification of depression. Comprehensive psychiatry 55, 4 (2014), 866--869."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-746"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133944.3133951"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1207\/S15374424JCCP3301_14"},{"key":"e_1_3_2_1_72_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_73_1","volume-title":"Proceedings of the conference. Association for Computational Linguistics. Meeting","volume":"2019","author":"Hubert Tsai Yao-Hung","year":"2019","unstructured":"Yao-Hung Hubert Tsai, Shaojie Bai, Paul Pu Liang, J Zico Kolter, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2019. Multimodal transformer for unaligned multimodal language sequences. In Proceedings of the conference. Association for Computational Linguistics. Meeting, Vol. 2019. NIH Public Access, 6558."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988258"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CogInfoCom.2012.6422035"},{"key":"e_1_3_2_1_76_1","volume-title":"Acoustic differences between healthy and depressed people: a cross-situation study. BMC psychiatry 19","author":"Wang Jingying","year":"2019","unstructured":"Jingying Wang, Lei Zhang, Tianli Liu, Wei Pan, Bin Hu, and Tingshao Zhu. 2019. Acoustic differences between healthy and depressed people: a cross-situation study. BMC psychiatry 19 (2019), 1--12."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054345"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1037\/h0036706"},{"key":"e_1_3_2_1_79_1","volume-title":"Articulation rate in psychotherapeutic dialogues for depression: patients and therapists. depression 5","author":"White Laurence","year":"2022","unstructured":"Laurence White and Hannah Grimes. 2022. Articulation rate in psychotherapeutic dialogues for depression: patients and therapists. depression 5 (2022), 23."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988263"},{"key":"e_1_3_2_1_81_1","volume-title":"Affective conditioning on hierarchical networks applied to depression detection from transcribed clinical interviews. arXiv preprint arXiv:2006.08336","author":"Xezonaki Danai","year":"2020","unstructured":"Danai Xezonaki, Georgios Paraskevopoulos, Alexandros Potamianos, and Shrikanth Narayanan. 2020. Affective conditioning on hierarchical networks applied to depression detection from transcribed clinical interviews. arXiv preprint arXiv:2006.08336 (2020)."},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICESC51422.2021.9532751"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1002\/ima.22793"},{"key":"e_1_3_2_1_84_1","volume-title":"Mtag: Modal-temporal attention graph for unaligned human multimodal language sequences. arXiv preprint arXiv:2010.11985","author":"Yang Jianing","year":"2020","unstructured":"Jianing Yang, YongxinWang, Ruitao Yi, Yuying Zhu, Azaan Rehman, Amir Zadeh, Soujanya Poria, and Louis-Philippe Morency. 2020. Mtag: Modal-temporal attention graph for unaligned human multimodal language sequences. arXiv preprint arXiv:2010.11985 (2020)."},{"key":"e_1_3_2_1_85_1","volume-title":"Detecting depression severity from vocal prosody","author":"Yang Ying","year":"2012","unstructured":"Ying Yang, Catherine Fairbairn, and Jeffrey F Cohn. 2012. Detecting depression severity from vocal prosody. IEEE transactions on affective computing 4, 2 (2012)."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jad.2021.08.090"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3347320.3357696"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21483"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1097\/00004583-198709000-00002"},{"key":"e_1_3_2_1_90_1","volume-title":"Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250","author":"Zadeh Amir","year":"2017","unstructured":"Amir Zadeh, Minghai Chen, Soujanya Poria, Erik Cambria, and Louis-Philippe Morency. 2017. Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250 (2017)."},{"key":"e_1_3_2_1_91_1","volume-title":"Proceedings of the 29th ACM International Conference on Multimedia. 135--143","author":"Zhang Pingyue","year":"2021","unstructured":"Pingyue Zhang, Mengyue Wu, Heinrich Dinkel, and Kai Yu. 2021. Depa: Selfsupervised audio embedding for depression detection. In Proceedings of the 29th ACM International Conference on Multimedia. 135--143."},{"key":"e_1_3_2_1_92_1","volume-title":"Hierarchical Convolutional Attention Network for Depression Detection on Social Media and Its Impact During Pandemic","author":"Zogan Hamad","year":"2023","unstructured":"Hamad Zogan, Imran Razzak, Shoaib Jameel, and Guandong Xu. 2023. Hierarchical Convolutional Attention Network for Depression Detection on Social Media and Its Impact During Pandemic. IEEE Journal of Biomedical and Health Informatics (2023)."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679797","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679797","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:28Z","timestamp":1750294708000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679797"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":92,"alternative-id":["10.1145\/3627673.3679797","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679797","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}