{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T11:03:10Z","timestamp":1770462190771,"version":"3.49.0"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100016714","name":"University of Sharjah","doi-asserted-by":"crossref","award":["19020403139"],"award-info":[{"award-number":["19020403139"]}],"id":[{"id":"10.13039\/100016714","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s10772-021-09869-1","type":"journal-article","created":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T18:02:45Z","timestamp":1625508165000},"page":"1055-1066","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Speaker identification in stressful talking environments based on convolutional neural network"],"prefix":"10.1007","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7856-9342","authenticated-orcid":false,"given":"Ismail","family":"Shahin","sequence":"first","affiliation":[]},{"given":"Ali Bou","family":"Nassif","sequence":"additional","affiliation":[]},{"given":"Noor","family":"Hindawi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,5]]},"reference":[{"key":"9869_CR1","doi-asserted-by":"crossref","unstructured":"Abdel-hamid, O., Mohamed, A., Jiang, H., Deng, L., Penn, G., & Yu, D. (2014). Convolutional neural networks for speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(10), 1533\u20131545.","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"9869_CR2","first-page":"1","volume":"2015","author":"K Ahmad","year":"2015","unstructured":"Ahmad, K., Thosar, A., Nirmal, J., & Pande, V. (2015). A unique approach in text independent speaker recognition using MFCC feature sets and probabilistic neural network. Eighth International Conference on Advances in Pattern Recognition (ICAPR), 2015, 1\u20136.","journal-title":"Eighth International Conference on Advances in Pattern Recognition (ICAPR)"},{"issue":"1","key":"9869_CR3","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/S0167-7012(00)00201-3","volume":"43","author":"IA Basheer","year":"2000","unstructured":"Basheer, I. A., & Hajmeer, M. (2000). Artificial neural networks: Fundamentals, computing, design, and application. Journal of Microbiol Methods, 43(1), 3\u201331.","journal-title":"Journal of Microbiol Methods"},{"key":"9869_CR4","first-page":"281","volume":"13","author":"J Bergstra","year":"2012","unstructured":"Bergstra, J., & Bengio, Y. (2012). Random search for hyper-parameter optimization. Journal of Machine Learning Research, 13, 281\u2013305.","journal-title":"Journal of Machine Learning Research"},{"key":"9869_CR5","doi-asserted-by":"publisher","unstructured":"Bhattacharya, G., Kenny, P., Alam, J., Stafylakis, T., & Kenny, P. (2016). Deep neural network based text-dependent speaker verification: preliminary results. Odyssey. https:\/\/doi.org\/10.21437\/Odyssey.2016-2","DOI":"10.21437\/Odyssey.2016-2"},{"issue":"4","key":"9869_CR6","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1109\/89.848224","volume":"8","author":"SE Bou-Ghazale","year":"2000","unstructured":"Bou-Ghazale, S. E., & Hansen, J. H. L. (2000). A Comparative study of traditional and newly proposed features for recognition of speech under stress. IEEE Transaction Speech Audio Process., 8(4), 429\u2013442. https:\/\/doi.org\/10.1109\/89.848224","journal-title":"IEEE Transaction Speech Audio Process."},{"issue":"2","key":"9869_CR7","doi-asserted-by":"publisher","first-page":"143","DOI":"10.18178\/ijmlc.2019.9.2.778","volume":"9","author":"S Bunrit","year":"2019","unstructured":"Bunrit, S., Inkian, T., Kerdprasop, N., & Kerdprasop, K. (2019). Text-independent speaker identification using deep learning model of convolution neural network. International Journal of Machine Learning and Computing, 9(2), 143\u2013148. https:\/\/doi.org\/10.18178\/ijmlc.2019.9.2.778","journal-title":"International Journal of Machine Learning and Computing"},{"issue":"1","key":"9869_CR8","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1109\/89.260362","volume":"2","author":"KR Farrell","year":"1994","unstructured":"Farrell, K. R., Mammone, R. J., & Assaleh, K. T. (1994). Speaker recognition using neural networks and conventional classifiers. IEEE Transaction Speech Audio Process., 2(1), 194\u2013205. https:\/\/doi.org\/10.1109\/89.260362","journal-title":"IEEE Transaction Speech Audio Process."},{"issue":"1","key":"9869_CR9","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/TASSP.1986.1164788","volume":"34","author":"S Furui","year":"1986","unstructured":"Furui, S. (1986). Speaker-independent isolated word recognition using dynamic features of speech spectrum. IEEE Transactions on Acoustic, 34(1), 52\u201359. https:\/\/doi.org\/10.1109\/TASSP.1986.1164788","journal-title":"IEEE Transactions on Acoustic"},{"issue":"5\u20136","key":"9869_CR10","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1016\/0167-6393(91)90054-W","volume":"10","author":"S Furui","year":"1991","unstructured":"Furui, S. (1991). Speaker-dependent-feature extraction, recognition and processing techniques. Speech Communication, 10(5\u20136), 505\u2013520. https:\/\/doi.org\/10.1016\/0167-6393(91)90054-W","journal-title":"Speech Communication"},{"issue":"10","key":"9869_CR11","doi-asserted-by":"publisher","first-page":"1943","DOI":"10.1109\/TBME.2006.871883","volume":"53","author":"J Godino-llorente","year":"2006","unstructured":"Godino-llorente, J., G\u00f3mez-vilda, P., & Blanco-velasco, M. (2006). Dimensionality reduction of a pathological voice quality assessment system based on Gaussian mixture models and short-term cepstral parameters. IEEE Transactions on Biomedical Engineering, 53(10), 1943\u20131953.","journal-title":"IEEE Transactions on Biomedical Engineering"},{"key":"9869_CR12","doi-asserted-by":"crossref","unstructured":"Goutte, C., & Gaussier, E. (2005) A probabilistic interpretation of precision, recall and F-score, with implication for evaluation. In Advances in Information Retrieval, pp. 345\u2013359.","DOI":"10.1007\/978-3-540-31865-1_25"},{"key":"9869_CR13","unstructured":"Hansen, J. (1999). \u201cSUSAS Transcripts LDC99T33\u201d, Web Download. Linguistic Data Consortium."},{"key":"9869_CR14","doi-asserted-by":"crossref","unstructured":"Hansen , J., & Bou-Ghazale, S. (1997). Getting started with SUSAS\u202f: A speech under simulated and actual stress database. In Fifth European conference on speech communication and technology (pp. 2\u20135).","DOI":"10.21437\/Eurospeech.1997-494"},{"key":"9869_CR15","doi-asserted-by":"crossref","unstructured":"Hanson, B., & Applebaum, T. (1990) Robust speaker-independent word recognition using static, dynamic and acceleration features: Experiments with Lombard and noisy speech. In International conference on acoustics, speech, and signal processing, pp. 857\u2013860.","DOI":"10.1109\/ICASSP.1990.115973"},{"key":"9869_CR16","unstructured":"Hasan, R., Jamil, M., Rabbani, G., & Rahman, S. (2004). Speaker identification using MEL frequency cepstral coefficients. Variations, 1(4)"},{"key":"9869_CR17","unstructured":"Hogg, R., McKean, J., & Craig, A. (2005). Introduction to mathematical statistics"},{"key":"9869_CR18","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1109\/CAS47993.2019.9075461","volume":"2019","author":"AM Jalil","year":"2019","unstructured":"Jalil, A. M., Hasan, F. S., & Alabbasi, H. A. (2019). Speaker identification using convolutional neural network for clean and noisy speech samples. In First international conference of computer and applied sciences (CAS) (pp. 57\u201362). https:\/\/doi.org\/10.1109\/CAS47993.2019.9075461","journal-title":"First International Conference of Computer and Applied Sciences (CAS)"},{"key":"9869_CR19","doi-asserted-by":"crossref","unstructured":"Livingstone, S., & Russo, F. (2018). The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English. PLoS ONE, 13(5)","DOI":"10.1371\/journal.pone.0196391"},{"key":"9869_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2016.7738816","author":"Y Lukic","year":"2016","unstructured":"Lukic, Y., Vogt, C., Durr, O., & Stadelmann, T. (2016). Speaker identification and clustering using convolutional neural networks. IEEE International Workshop on Machine Learning for Signal Processing. https:\/\/doi.org\/10.1109\/MLSP.2016.7738816","journal-title":"IEEE International Workshop on Machine Learning for Signal Processing"},{"issue":"8","key":"9869_CR21","doi-asserted-by":"publisher","first-page":"2203","DOI":"10.1109\/TMM.2014.2360798","volume":"16","author":"Q Mao","year":"2014","unstructured":"Mao, Q., Dong, M., Huang, Z., & Zhan, Y. (2014). Learning salient features for speech emotion recognition using convolutional neural networks. IEEE Transaction Multimedia, 16(8), 2203\u20132213. https:\/\/doi.org\/10.1109\/TMM.2014.2360798","journal-title":"IEEE Transaction Multimedia"},{"key":"9869_CR22","doi-asserted-by":"publisher","first-page":"19143","DOI":"10.1109\/ACCESS.2019.2896880","volume":"7","author":"AB Nassif","year":"2019","unstructured":"Nassif, A. B., Shahin, I., Attili, I., Azzeh, M., & Shaalan, K. (2019). Speech recognition using deep neural networks: A systematic review. IEEE Access, 7, 19143\u201319165. https:\/\/doi.org\/10.1109\/ACCESS.2019.2896880","journal-title":"IEEE Access"},{"key":"9869_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107141","volume":"103","author":"AB Nassif","year":"2021","unstructured":"Nassif, A. B., Shahin, I., Hamsa, S., Nemmour, N., & Hirose, K. (2021). CASA-based speaker identification using cascaded GMM-CNN classifier in noisy and emotional talking conditions. Applied Soft Computing, 103, 107141. https:\/\/doi.org\/10.1016\/j.asoc.2021.107141","journal-title":"Applied Soft Computing"},{"key":"9869_CR25","unstructured":"Quatieri, T. F. (2002). Discrete-time speech signal processing: principles and practice. 2002."},{"issue":"3","key":"9869_CR26","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/s10772-010-9075-z","volume":"13","author":"GS Raja","year":"2010","unstructured":"Raja, G. S., & Dandapat, S. (2010). Speaker recognition under stressed condition. International Journal of Speech Technology, 13(3), 141\u2013161. https:\/\/doi.org\/10.1007\/s10772-010-9075-z","journal-title":"International Journal of Speech Technology"},{"key":"9869_CR27","doi-asserted-by":"publisher","first-page":"4072","DOI":"10.1109\/ICASSP.2002.5745552","volume":"4","author":"DA Reynolds","year":"2002","unstructured":"Reynolds, D. A. (2002). An overview of automatic speaker recognition technology. In IEEE international conference on acoustics, speech and signal processing (vol. 4, pp. 4072\u20134075). https:\/\/doi.org\/10.1109\/ICASSP.2002.5745552","journal-title":"IEEE International Conference on Acoustics, Speech and Signal Processing"},{"issue":"4","key":"9869_CR28","doi-asserted-by":"publisher","first-page":"1047","DOI":"10.1016\/j.specom.2006.01.005","volume":"48","author":"I Shahin","year":"2006","unstructured":"Shahin, I. (2006). Enhancing speaker identification performance under the shouted talking condition using second-order circular hidden Markov models. Speech Communication, 48(4), 1047\u20131055.","journal-title":"Speech Communication"},{"issue":"11","key":"9869_CR29","doi-asserted-by":"publisher","first-page":"2700","DOI":"10.1016\/j.sigpro.2008.05.012","volume":"88","author":"I Shahin","year":"2008","unstructured":"Shahin, I. (2008). Speaker identification in the shouted environment using Suprasegmental Hidden Markov Models. Signal Processing, 88(11), 2700\u20132708. https:\/\/doi.org\/10.1016\/j.sigpro.2008.05.012","journal-title":"Signal Processing"},{"issue":"1","key":"9869_CR30","doi-asserted-by":"publisher","first-page":"862138","DOI":"10.1155\/2010\/862138","volume":"2010","author":"I Shahin","year":"2010","unstructured":"Shahin, I. (2010). Employing second-order circular suprasegmental hidden markov models to enhance speaker identification performance in shouted talking environments. EURASIP Journal on Audio, Speech, and Music Processing. https:\/\/doi.org\/10.1155\/2010\/862138","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"issue":"1\u20132","key":"9869_CR31","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s12193-011-0082-4","volume":"6","author":"I Shahin","year":"2012","unstructured":"Shahin, I. (2012). Studying and enhancing talking condition recognition in stressful and emotional talking environments based on HMMs, CHMM2s and SPHMMs. Journal of Multimodal User Interfaces, 6(1\u20132), 59\u201371. https:\/\/doi.org\/10.1007\/s12193-011-0082-4","journal-title":"Journal of Multimodal User Interfaces"},{"issue":"3","key":"9869_CR41","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1007\/s10772-013-9188-2","volume":"16","author":"I Shahin","year":"2013","unstructured":"Shahin, I. (2013). Employing both gender and emotion cues to enhance speaker identification performance in emotional talking environments. International Journal of Speech Technology, 16(3), 341\u2013351. https:\/\/doi.org\/10.1007\/s10772-013-9188-2","journal-title":"International Journal of Speech Technology"},{"issue":"10","key":"9869_CR32","doi-asserted-by":"publisher","first-page":"3770","DOI":"10.1007\/s00034-015-0220-4","volume":"35","author":"I Shahin","year":"2016","unstructured":"Shahin, I. (2016a). \u201cSpeaker identification in a shouted talking environment based on novel third-order circular suprasegmental hidden markov models. Circuits, Systems, and Signal Processing, 35(10), 3770\u20133792. https:\/\/doi.org\/10.1007\/s00034-015-0220-4","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"1","key":"9869_CR33","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1515\/jisys-2014-0118","volume":"25","author":"I Shahin","year":"2016","unstructured":"Shahin, I. (2016b). Employing emotion cues to verify speakers in emotional talking environments. Journal of Intelligent Systems, 25(1), 3\u201317. https:\/\/doi.org\/10.1515\/jisys-2014-0118","journal-title":"Journal of Intelligent Systems"},{"key":"9869_CR34","doi-asserted-by":"publisher","unstructured":"Shahin, I. (2016c). Emirati speaker verification based on HMMls, HMM2s, and HMM3s. In IEEE 13th international conference on signal processing (ICSP) (pp. 562\u2013567). https:\/\/doi.org\/10.1109\/ICSP.2016.7877896.","DOI":"10.1109\/ICSP.2016.7877896"},{"issue":"10","key":"9869_CR35","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1016\/j.engappai.2014.07.006","volume":"35","author":"I Shahin","year":"2018","unstructured":"Shahin, I. (2018a). Novel third-order hidden Markov models for speaker identification in shouted talking environments. Engineering Applications of Artificial Intelligence, 35(10), 316\u2013323. https:\/\/doi.org\/10.1016\/j.engappai.2014.07.006","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"9869_CR36","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/CTIT.2018.8649514","volume":"2018","author":"I Shahin","year":"2018","unstructured":"Shahin, I. (2018b). Text-independent emirati-accented speaker identification in emotional talking environment. In Fifth HCT information technology trends (ITT) (pp. 257\u2013262). https:\/\/doi.org\/10.1109\/CTIT.2018.8649514","journal-title":"Fifth HCT Information Technology Trends (ITT)"},{"key":"9869_CR37","doi-asserted-by":"publisher","unstructured":"Shahin, I., & Ba-Hutair, M. N. (2014). Emarati speaker identification. In 12th international conference on signal processing (ICSP) (pp. 488\u2013493). https:\/\/doi.org\/10.1109\/ICOSP.2014.7015053.","DOI":"10.1109\/ICOSP.2014.7015053"},{"key":"9869_CR38","doi-asserted-by":"publisher","unstructured":"Shahin, I., & Nassif, A. B. (2019). Emirati-accented speaker identification in stressful talking conditions. In International conference on electrical and computing technologies and applications (ICECTA), Nov. 2019 (pp. 1\u20136). https:\/\/doi.org\/10.1109\/ICECTA48151.2019.8959731.","DOI":"10.1109\/ICECTA48151.2019.8959731"},{"issue":"2","key":"9869_CR39","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/s10772-018-9502-0","volume":"21","author":"I Shahin","year":"2018","unstructured":"Shahin, I., Nassif, A. B., & Bahutair, M. (2018). Emirati-accented speaker identification in each of neutral and shouted talking environments. International Journal of Speech Technology, 21(2), 265\u2013278. https:\/\/doi.org\/10.1007\/s10772-018-9502-0","journal-title":"International Journal of Speech Technology"},{"key":"9869_CR40","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-018-3760-2","author":"I Shahin","year":"2018","unstructured":"Shahin, I., Nassif, A. B., & Hamsa, S. (2018). Novel cascaded Gaussian mixture model-deep neural network classifier for speaker identification in emotional talking environments. Neural Computing and Applications. https:\/\/doi.org\/10.1007\/s00521-018-3760-2","journal-title":"Neural Computing and Applications"},{"key":"9869_CR42","unstructured":"Wikipedia, F. (2014). Softmax function. http:\/\/en.wikipedia.org\/w\/index.php?title=Softmax_function&oldid=623230338."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09869-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09869-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09869-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,2]],"date-time":"2023-01-02T22:25:16Z","timestamp":1672698316000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09869-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,5]]},"references-count":41,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["9869"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09869-1","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,5]]},"assertion":[{"value":"31 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 June 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 July 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}