{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T16:24:52Z","timestamp":1779380692379,"version":"3.53.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"25","license":[{"start":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T00:00:00Z","timestamp":1705881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T00:00:00Z","timestamp":1705881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-18076-w","type":"journal-article","created":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T07:03:31Z","timestamp":1705907011000},"page":"66135-66173","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Depression detection using cascaded attention based deep learning framework using speech data"],"prefix":"10.1007","volume":"83","author":[{"given":"Sachi","family":"Gupta","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gaurav","family":"Agarwal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shivani","family":"Agarwal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dilkeshwar","family":"Pandey","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,1,22]]},"reference":[{"issue":"23","key":"18076_CR1","doi-asserted-by":"publisher","first-page":"8701","DOI":"10.3390\/app10238701","volume":"10","author":"L Lin","year":"2020","unstructured":"Lin L, Chen X, Shen Y, Zhang L (2020) towards automatic depression detection: a bilstm\/1d cnn-based mosdel. Appl Sci 10(23):8701","journal-title":"Appl Sci"},{"issue":"3","key":"18076_CR2","doi-asserted-by":"publisher","first-page":"e12773","DOI":"10.1111\/exsy.12773","volume":"39","author":"HW Loh","year":"2022","unstructured":"Loh HW, Ooi CP, Aydemir E, Tuncer T, Dogan S, Acharya UR (2022) Decision support system for major depression detection using spectrogram and convolution neural network with EEG signals. Expert Syst 39(3):e12773","journal-title":"Expert Syst"},{"key":"18076_CR3","doi-asserted-by":"crossref","unstructured":"Casado C\u00c1, Ca\u00f1ellas ML, L\u00f3pez MB (2022) Depression recognition using remote photoplethysmography from facial videos.\u00a0IEEE Trans Affect Comput.\u00a0arXiv preprint arXiv:2206.04399","DOI":"10.1109\/TAFFC.2023.3238641"},{"issue":"6","key":"18076_CR4","doi-asserted-by":"publisher","first-page":"3569","DOI":"10.3390\/su14063569","volume":"14","author":"J Park","year":"2022","unstructured":"Park J, Moon N (2022) Design and implementation of attention depression detection model based on multi-modal analysis. Sustainability 14(6):3569","journal-title":"Sustainability"},{"key":"18076_CR5","unstructured":"Campbell EL, Dineley J, Conde P, Matcham F, Lamers F, Siddi S, Docio-Fernandez L, Garcia-Mateo C, Cummins N (2022) Detecting the severity of major depressive disorder from speech: a novel HARD-training methodology. arXiv preprint arXiv:2206.01542"},{"key":"18076_CR6","doi-asserted-by":"crossref","unstructured":"Punithavathi R, Sharmila M, Avudaiappan T, Raj I, Kanchana S, Alemayehu Mamo SA (2022) Empirical investigation for predicting depression from different machine learning based voice recognition techniques. Evid Based Complement Alternat Med\u00a0","DOI":"10.1155\/2022\/6395860"},{"key":"18076_CR7","doi-asserted-by":"crossref","unstructured":"Dumpala SH, Uher R, Matwin S, Kiefte M, Oore S (2022) Sine-wave speech and privacy-preserving depression detection. In: Proc. SMM21, Workshop on Speech, Music and Mind, 2021:11\u201315","DOI":"10.21437\/SMM.2021-3"},{"key":"18076_CR8","doi-asserted-by":"crossref","unstructured":"Xu L, Hou J, Gao J (2021) A novel smart depression recognition method using human-computer interaction system. Wirel Communic Mob Comput. 1\u20138","DOI":"10.1155\/2021\/5565967"},{"key":"18076_CR9","doi-asserted-by":"crossref","unstructured":"Rajawat AS, Rawat R, Barhanpurkar K, Shaw RN, Ghosh A (2021) Depression detection for elderly people using AI robotic systems leveraging the Nelder\u2013Mead Method. In: Artificial Intelligence for Future Generation\u00a0Robotics, Elsevier, pp 55\u201370","DOI":"10.1016\/B978-0-323-85498-6.00006-X"},{"key":"18076_CR10","doi-asserted-by":"publisher","first-page":"609760","DOI":"10.3389\/fnins.2021.609760","volume":"15","author":"W Guo","year":"2021","unstructured":"Guo W, Yang H, Liu Z, Xu Y, Hu B (2021) deep neural networks for depression recognition based on 2d and 3d facial expressions under emotional stimulus tasks. Front Neurosci 15:609760","journal-title":"Front Neurosci"},{"issue":"12","key":"18076_CR11","doi-asserted-by":"publisher","first-page":"1746","DOI":"10.52586\/5066","volume":"26","author":"X Lu","year":"2021","unstructured":"Lu X, Shi D, Liu Y, Yuan J (2021) Speech depression recognition based on attentional residual network. Front Biosci Landmark 26(12):1746\u20131759","journal-title":"Front Biosci Landmark"},{"key":"18076_CR12","doi-asserted-by":"crossref","unstructured":"Villatoro-Tello E, Dubagunta SP, Fritsch J, Ram\u00edrez-de-la-Rosa G, Motlicek P, Magimai-Doss M (2021) Late fusion of the available lexicon and raw waveform-based acoustic modeling for depression and dementia recognition.\u00a0In: Interspeech, pp 1927\u20131931","DOI":"10.21437\/Interspeech.2021-1288"},{"key":"18076_CR13","doi-asserted-by":"crossref","unstructured":"Yang J, Lu H, Li C, Hu X, Hu B (2022) Data augmentation for depression detection using skeleton-based gait information. Med Biol Eng Comput 60(9):2665\u20132679","DOI":"10.1007\/s11517-022-02595-z"},{"key":"18076_CR14","doi-asserted-by":"crossref","unstructured":"Wang H, Liu Y, Zhen X, Tu X (2021) Depression speech recognition with a three-dimensional convolutional network. Front Hum Neurosci\u00a015:713823","DOI":"10.3389\/fnhum.2021.713823"},{"issue":"6","key":"18076_CR15","doi-asserted-by":"publisher","first-page":"1339","DOI":"10.1007\/s11517-021-02358-2","volume":"59","author":"M Tadalagi","year":"2021","unstructured":"Tadalagi M, Joshi AM (2021) AutoDep: automatic depression detection using facial expressions based on linear binary pattern descriptor. Med Biol Eng Compu 59(6):1339\u20131354","journal-title":"Med Biol Eng Compu"},{"issue":"1","key":"18076_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-022-00622-2","volume":"9","author":"J Angskun","year":"2022","unstructured":"Angskun J, Tipprasert S, Angskun T (2022) big data analytics on social networks for real-time depression detection. J Big Data 9(1):1\u201315","journal-title":"J Big Data"},{"key":"18076_CR17","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1016\/j.neucom.2020.10.015","volume":"422","author":"L He","year":"2021","unstructured":"He L, Chan JCW, Wang Z (2021) Automatic depression recognition using CNN with attention mechanism from videos. Neurocomputing 422:165\u2013175","journal-title":"Neurocomputing"},{"key":"18076_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2021.3053999","volume":"70","author":"A Seal","year":"2021","unstructured":"Seal A, Bajpai R, Agnihotri J, Yazidi A, Herrera-Viedma E, Krejcar O (2021) DeprNet: a deep convolution neural network framework for detecting depression using EEG. IEEE Trans Instrum Meas 70:1\u201313","journal-title":"IEEE Trans Instrum Meas"},{"issue":"7","key":"18076_CR19","doi-asserted-by":"publisher","first-page":"9961","DOI":"10.1007\/s11042-020-10118-x","volume":"80","author":"G Agarwal","year":"2021","unstructured":"Agarwal G, Om H (2021) Performance of deer hunting optimization based deep learning algorithm for speech emotion recognition. Multimedia Tool Appl 80(7):9961\u20139992","journal-title":"Multimedia Tool Appl"},{"issue":"2","key":"18076_CR20","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1049\/sil2.12015","volume":"15","author":"G Agarwal","year":"2021","unstructured":"Agarwal G, Om H (2021) an efficient supervised framework for music mood recognition using autoencoder-based optimized support vector regression model. IET Signal Proc 15(2):98\u2013121","journal-title":"IET Signal Proc"},{"issue":"2","key":"18076_CR21","doi-asserted-by":"publisher","first-page":"377","DOI":"10.7763\/IJCTE.2013.V5.713","volume":"5","author":"S Gupta","year":"2013","unstructured":"Gupta S, Agarwal G, Kumar V (2013) an efficient and robust genetic algorithm for multiprocessor task scheduling. Int J Comput Theory Eng 5(2):377","journal-title":"Int J Comput Theory Eng"},{"issue":"5","key":"18076_CR22","doi-asserted-by":"publisher","first-page":"676","DOI":"10.3390\/electronics11050676","volume":"11","author":"A Amanat","year":"2022","unstructured":"Amanat A, Rizwan M, Javed AR, Abdelhaq M, Alsaqour R, Pandya S, Uddin M (2022) Deep learning for depression detection from textual data. Electronics 11(5):676","journal-title":"Electronics"},{"key":"18076_CR23","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.neucom.2021.02.019","volume":"441","author":"Y Dong","year":"2021","unstructured":"Dong Y, Yang X (2021) A hierarchical depression detection model based on vocal and emotional cues. Neurocomputing 441:279\u2013290","journal-title":"Neurocomputing"},{"key":"18076_CR24","doi-asserted-by":"crossref","unstructured":"Cai C, Niu M, Liu B, Tao J, Liu X (2021) TDCA-Net: time-domain channel attention network for depression detection.\u00a0In: Interspeech, pp 2511\u201325155","DOI":"10.21437\/Interspeech.2021-1176"},{"key":"18076_CR25","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1016\/j.procs.2020.04.003","volume":"171","author":"NS Srimadhur","year":"2020","unstructured":"Srimadhur NS, Lalitha S (2020) an end-to-end model for detection and assessment of depression levels using speech. Procedia Comput Sci 171:12\u201321","journal-title":"Procedia Comput Sci"},{"issue":"6","key":"18076_CR26","doi-asserted-by":"publisher","first-page":"688","DOI":"10.3390\/e22060688","volume":"22","author":"A V\u00e1zquez-Romero","year":"2020","unstructured":"V\u00e1zquez-Romero A, Antol\u00edn AG (2020) Automatic detection of depression in speech using ensemble convolutional neural networks. Entropy 22(6):688","journal-title":"Entropy"},{"key":"18076_CR27","doi-asserted-by":"publisher","first-page":"103107","DOI":"10.1016\/j.bspc.2021.103107","volume":"71","author":"E Rejaibi","year":"2022","unstructured":"Rejaibi E, Komaty A, Meriaudeau F, Agrebi S, Othmani A (2022) MFCC-based recurrent neural network for automatic clinical depression recognition and assessment from speech. Biomed Signal Process Control 71:103107","journal-title":"Biomed Signal Process Control"},{"issue":"1","key":"18076_CR28","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1109\/TCSS.2022.3154442","volume":"10","author":"L Ansari","year":"2022","unstructured":"Ansari L, Ji S, Chen Q, Cambria E (2022) Ensemble hybrid learning methods for automated depression detection. IEEE Trans Comput Soc Syst 10(1):211\u2013219","journal-title":"IEEE Trans Comput Soc Syst"},{"key":"18076_CR29","doi-asserted-by":"crossref","unstructured":"Shen Y, Yang H, Lin L (2022) Automatic depression detection: An emotional audio-textual corpus and a GRU\/BiLSTM-based model. In: ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 6247\u20136251","DOI":"10.1109\/ICASSP43922.2022.9746569"},{"key":"18076_CR30","first-page":"100005","volume":"2","author":"M Muzammel","year":"2020","unstructured":"Muzammel M, Salam H, Hoffmann Y, Chetouani M, Othmani A (2020) AudVowelConsNet: a phoneme-level based deep CNN architecture for clinical depression diagnosis. Mach Learn Appl 2:100005","journal-title":"Mach Learn Appl"},{"issue":"11","key":"18076_CR31","doi-asserted-by":"publisher","first-page":"2019","DOI":"10.1587\/transinf.2020EDL8132","volume":"104","author":"Y Zhao","year":"2021","unstructured":"Zhao Y, Xie Y, Liang R, Zhang L, Zhao L, Liu C (2021) Detecting depression from speech through an attentive LSTM network. IEICE Trans Inf Syst 104(11):2019\u20132023","journal-title":"IEICE Trans Inf Syst"},{"key":"18076_CR32","doi-asserted-by":"crossref","unstructured":"Saidi A, Othman SB, Saoud SB (2020) Hybrid CNN-SVM classifier for efficient depression detection system. In: 2020 4th International Conference on Advanced Systems and Emergent Technologies (IC_ASET), IEEE, 229\u2013234","DOI":"10.1109\/IC_ASET49463.2020.9318302"},{"key":"18076_CR33","doi-asserted-by":"crossref","unstructured":"Walsh D, Dev S, Nag A (2023) Hilbert-Huang-Transform Based Features for Accent Classification of Non-Native English Speakers. In: 2023 34th Irish Signals and Systems Conference (ISSC), IEEE 1\u20136","DOI":"10.1109\/ISSC59246.2023.10162075"},{"issue":"2","key":"18076_CR34","first-page":"16","volume":"10","author":"DS Darling","year":"2022","unstructured":"Darling DS, Hinduja J (2022) Feature extraction in speech recognition using linear predictive coding: an overview. i-Manager\u2019s J Digit Signal Process 10(2):16","journal-title":"i-Manager's J Digit Signal Process"},{"key":"18076_CR35","unstructured":"Dutta D, Choudhury RD, Gogoi S (n.d.) Speech databases, features extraction techniques and classifiers with special reference to automatic speech emotion recognition"},{"key":"18076_CR36","doi-asserted-by":"crossref","unstructured":"Seneviratne N, Espy-Wilson C (2021) Speech based depression severity level classification using a multi-stage dilated cnn-lstm model. arXiv preprint arXiv:2104.04195","DOI":"10.21437\/Interspeech.2021-1967"},{"key":"18076_CR37","doi-asserted-by":"publisher","first-page":"44883","DOI":"10.1109\/ACCESS.2019.2909180","volume":"7","author":"MM Tadesse","year":"2019","unstructured":"Tadesse MM, Lin H, Xu B, Yang L (2019) Detection of depression-related posts in reddit social media forum. IEEE Access 7:44883\u201344893","journal-title":"IEEE Access"},{"issue":"6","key":"18076_CR38","doi-asserted-by":"publisher","first-page":"1385","DOI":"10.3390\/s17061385","volume":"17","author":"S-C Liao","year":"2017","unstructured":"Liao S-C, Wu C-T, Huang H-C, Cheng W-T, Liu Y-H (2017) Major depression detection from EEG signals using kernel eigen-filter-bank common spatial patterns. Sensors 17(6):1385","journal-title":"Sensors"},{"key":"18076_CR39","doi-asserted-by":"crossref","unstructured":"Yalamanchili B, Kota NS, Abbaraju MS, Nadella VSS, Alluri SV (2020) Real-time acoustic based depression detection using machine learning techniques. In: 2020 International conference on emerging trends in information technology and engineering (ic-ETITE), IEEE, 1\u20136","DOI":"10.1109\/ic-ETITE47903.2020.394"},{"key":"18076_CR40","doi-asserted-by":"publisher","first-page":"104561","DOI":"10.1016\/j.bspc.2022.104561","volume":"82","author":"M Fang","year":"2023","unstructured":"Fang M, Peng S, Liang Y, Hung C-C, Liu S (2023) A multi-modal fusion model with multi-level attention mechanism for depression detection. Biomed Signal Process Control 82:104561","journal-title":"Biomed Signal Process Control"},{"issue":"2","key":"18076_CR41","doi-asserted-by":"publisher","first-page":"328","DOI":"10.3390\/electronics12020328","volume":"12","author":"F Yin","year":"2023","unstructured":"Yin F, Du J, Xu X, Zhao L (2023) Depression detection in speech using transformer and parallel convolutional neural networks. Electronics 12(2):328","journal-title":"Electronics"},{"key":"18076_CR42","doi-asserted-by":"crossref","unstructured":"Huang Z, Epps J, Joachim D (2020) Exploiting vocal tract coordination using dilated cnns for depression detection in naturalistic environments. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 6549\u20136553","DOI":"10.1109\/ICASSP40776.2020.9054323"},{"key":"18076_CR43","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1016\/j.jad.2022.11.060","volume":"323","author":"M Du","year":"2023","unstructured":"Du M, Liu S, Wang T, Zhang W, Ke Y, Chen L, Ming D (2023) Depression recognition using a proposed speech chain model fusing speech production and perception features. J Affect Disord 323:299\u2013308","journal-title":"J Affect Disord"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-18076-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-18076-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-18076-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,9]],"date-time":"2024-07-09T10:23:44Z","timestamp":1720520624000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-18076-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,22]]},"references-count":43,"journal-issue":{"issue":"25","published-online":{"date-parts":[[2024,7]]}},"alternative-id":["18076"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-18076-w","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,22]]},"assertion":[{"value":"20 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"All the authors involved have agreed to participate in this submitted article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"All the authors involved in this manuscript give full consent for publication of this submitted article.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to publish"}},{"value":"Authors declare that they have no conflict of interest.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}