{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T05:46:47Z","timestamp":1751521607084},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T00:00:00Z","timestamp":1652659200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T00:00:00Z","timestamp":1652659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1007\/s10772-022-09974-9","type":"journal-article","created":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T16:03:56Z","timestamp":1652717036000},"page":"907-920","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Analysis of influencing features with spectral feature extraction and multi-class classification using deep neural network for speech recognition system"],"prefix":"10.1007","volume":"25","author":[{"given":"Dinesh Kumar","family":"Anguraj","sequence":"first","affiliation":[]},{"given":"J.","family":"Anitha","sequence":"additional","affiliation":[]},{"given":"S. John Justin","family":"Thangaraj","sequence":"additional","affiliation":[]},{"given":"L.","family":"Ramesh","sequence":"additional","affiliation":[]},{"given":"Seetha","family":"Rama Krishna","sequence":"additional","affiliation":[]},{"given":"D.","family":"Mythrayee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,16]]},"reference":[{"key":"9974_CR1","doi-asserted-by":"publisher","first-page":"4299","DOI":"10.1109\/ACCESS.2016.2591442","volume":"4","author":"J Deng","year":"2016","unstructured":"Deng, J., Xu, X., Zhang, Z., Fr\u00fchholz, S., & Schuller, B. (2016). Exploitation of phase-based features for whispered speech emotion recognition. IEEE Access, 4, 4299\u20134309.","journal-title":"IEEE Access"},{"issue":"1","key":"9974_CR2","doi-asserted-by":"publisher","first-page":"27","DOI":"10.21608\/ejle.2020.22022.1002","volume":"7","author":"EE El-Maghraby","year":"2020","unstructured":"El-Maghraby, E. E., Gody, A., & Farouk, M. (2020). Noise-robust speech recognition system based on multimodal audio-visual approach using different deep learning classification techniques. Egyptian Journal of English Language, 7(1), 27\u201342.","journal-title":"Egyptian Journal of English Language"},{"key":"9974_CR3","doi-asserted-by":"crossref","unstructured":"Fan, Y., Jiang, Q.-Y., Yu, Y.-Q., & Li, W.-J. (2019). Deep hashing for speaker identification and retrieval. In Proceedings of Interspeech Graz, pp. 2908\u20132912.","DOI":"10.21437\/Interspeech.2019-2457"},{"issue":"3","key":"9974_CR4","doi-asserted-by":"publisher","first-page":"703","DOI":"10.2298\/CSIS170112024H","volume":"14","author":"N He","year":"2017","unstructured":"He, N., & Zhao, H. (2017). A retrieval algorithm of encrypted speech based on syllable-level perceptual hashing\u2019\u2019. Computer Science and Information Systems, 14(3), 703\u2013718.","journal-title":"Computer Science and Information Systems"},{"key":"9974_CR5","unstructured":"Ioffe, S., & Szegedy, C. (2015). Batch normalisation: Accelerating deep network training by reducing internal covariate shift. In The Proceedings of Machine Learning Research., pp. 1\u20139."},{"key":"9974_CR6","doi-asserted-by":"crossref","unstructured":"Khonglah, B.K. & Prasanna, S. R. M. (2016). Low frequency region of vocal tract information for speech\/music classification. In Proceedings of IEEE Region 10 Conference, pp. 2593\u20132597.","DOI":"10.1109\/TENCON.2016.7848506"},{"key":"9974_CR7","doi-asserted-by":"crossref","unstructured":"Kim, B. & Pardo, B.A. (2019). Improving content-based audio retrieval by vocal imitation feedback.' In Proceedings of the IEEE International Conference on Acoustics Speech Signal Process. (ICASSP), Brighton, pp. 4100\u20134104.","DOI":"10.1109\/ICASSP.2019.8683461"},{"key":"9974_CR8","unstructured":"Kruspe, A., Zapf, D., & Lukashevich, H. (2017) Automatic speech\/music discrimination for broadcast signals. In Proceeding of INFORMATIK Gesellschaft f\u00fcr Informatik, Bonn, pp. 151\u2013162."},{"issue":"17","key":"9974_CR9","doi-asserted-by":"publisher","first-page":"24431","DOI":"10.1007\/s11042-018-7072-4","volume":"78","author":"LW Li","year":"2019","unstructured":"Li, L. W., Fu, T., & Hu, W. (2019). Piecewise supervised deep hashing for image retrieval. Multimedia Tools and Applications, 78(17), 24431\u201324451.","journal-title":"Multimedia Tools and Applications"},{"issue":"21","key":"9974_CR10","doi-asserted-by":"publisher","first-page":"30585","DOI":"10.1007\/s11042-018-6414-6","volume":"78","author":"Y Li","year":"2019","unstructured":"Li, Y., Kong, X., & Fu, H. (2019). Exploring geometric information in CNN for image retrieval\u2019\u2019. Multimedia Tools and Applications, 78(21), 30585\u201330598.","journal-title":"Multimedia Tools and Applications"},{"issue":"6","key":"9974_CR11","doi-asserted-by":"publisher","first-page":"913","DOI":"10.1007\/s12652-016-0406-z","volume":"8","author":"Y Li","year":"2017","unstructured":"Li, Y., Tao, J., Chao, L., Bao, W., & Liu, Y. (2017). CHEAVD: A Chinese natural emotional audio_visual database. Journal of Ambient Intelligence and Humanized Computing, 8(6), 913\u2013924.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"key":"9974_CR12","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/j.neucom.2018.05.005","volume":"309","author":"ZT Liu","year":"2018","unstructured":"Liu, Z. T., Xie, Q., Wu, M., Cao, W.-H., Mei, Y., & Mao, J.-W. (2018). Speech emotion recognition based on an improved brain emotion learning model. Neurocomputing, 309, 145\u2013156.","journal-title":"Neurocomputing"},{"issue":"8","key":"9974_CR13","doi-asserted-by":"publisher","first-page":"2203","DOI":"10.1109\/TMM.2014.2360798","volume":"16","author":"Q Mao","year":"2014","unstructured":"Mao, Q., Dong, M., Huang, Z., & Zhan, Y. (2014). Learning salient features for speech emotion recognition using convolutional neural networks. IEEE Transactions on Multimedia, 16(8), 2203\u20132213.","journal-title":"IEEE Transactions on Multimedia"},{"key":"9974_CR14","first-page":"67","volume":"2","author":"S Masoumeh","year":"2014","unstructured":"Masoumeh, S., & Mohammad, M. B. (2014). A review on speech-music discrimination methods. International Journal of Computer Science and Network Solutions, 2, 67\u201378.","journal-title":"International Journal of Computer Science and Network Solutions"},{"key":"9974_CR15","doi-asserted-by":"crossref","unstructured":"Mirsamadi, S, E. Barsoum, & C. Zhang, (2017). Automatic speech emotion recognition using recurrent neural networks with local attention. In Proceedings of the IEEE International Conference on Acoustics, (ICASSP), pp. 2227\u20132231.","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"9974_CR16","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1016\/j.eswa.2018.05.016","volume":"114","author":"M Papakostas","year":"2018","unstructured":"Papakostas, M., & Giannakopoulos, T. (2018). Speech-music discrimination using deep visual feature extractors. Expert Systems with Applications, 114, 334\u2013344.","journal-title":"Expert Systems with Applications"},{"key":"9974_CR17","doi-asserted-by":"crossref","unstructured":"Rahmani, A.M. & F. Razzazi. (2019). An LSTM auto-encoder for single-channel speaker attention system. In Proceeding of the 9th International Conference Computer Knowledge. Engineering (ICCKE), Mashhad, pp. 110\u2013115.","DOI":"10.1109\/ICCKE48569.2019.8965084"},{"key":"9974_CR18","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1016\/j.specom.2016.07.010","volume":"83","author":"P Song","year":"2016","unstructured":"Song, P., Zheng, W., Ou, S., Zhang, X., Jin, Y., Liu, J., & Yu, Y. (2016). Crosscorpus speech emotion recognition based on transfer non-negative matrix factorisation. Speech Communication, 83, 34\u201341.","journal-title":"Speech Communication"},{"key":"9974_CR19","doi-asserted-by":"crossref","unstructured":"Srinivas, D., Roy, D., & Mohan, C.K. (2014). Learning sparse dictionaries for music and speech classification. In Proceeding of the 19th International Conference on Signal Processing, pp. 673\u2013675.","DOI":"10.1109\/ICDSP.2014.6900749"},{"key":"9974_CR20","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.specom.2019.10.004","volume":"115","author":"W Sun","year":"2019","unstructured":"Sun, W., Zou, B., Fu, S., Chen, J., & Wang, F. (2019). Speech emotion recognition based on DNN-decision tree SVM model. Speech Communication, 115, 29\u201337.","journal-title":"Speech Communication"},{"issue":"1","key":"9974_CR21","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/s10772-018-9491-z","volume":"21","author":"M Swain","year":"2018","unstructured":"Swain, M., Routray, A., & Kabisatpathy, P. (2018). Databases, features and classi_ers for speech emotion recognition: A review. International Journal of Speech Technology, 21(1), 93\u2013120.","journal-title":"International Journal of Speech Technology"},{"key":"9974_CR22","doi-asserted-by":"crossref","unstructured":"Wang, Z.Q. & Tashev, I. (2017) Learning utterance-level representations for speech emotion and age\/gender recognition using deep neural networks. In Proceedings of the IEEE International Conference on Acoustics, (ICASSP), pp. 5150\u20135154.","DOI":"10.1109\/ICASSP.2017.7953138"},{"key":"9974_CR23","doi-asserted-by":"crossref","unstructured":"Wen, G., Li, H., Huang, J., Li, D. & Xun, E (2017). Random deep belief networks for recognising emotions from speech signals. Computational Intelligence and Neuroscience","DOI":"10.1155\/2017\/1945630"},{"key":"9974_CR24","doi-asserted-by":"crossref","unstructured":"Xu, Y., Kong, Q., Wang, W. & Plumbley, M.D. (2018). Large-scale weakly supervised audio classification using gated convolutional neural network. In Proceedings of the IEEE International Conference on Acoustics, Speech Signal Process. (ICASSP), Calgary, pp. 121\u2013125.","DOI":"10.1109\/ICASSP.2018.8461975"},{"issue":"4","key":"9974_CR25","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1109\/TASLP.2019.2894554","volume":"27","author":"J Yu","year":"2019","unstructured":"Yu, J., Markov, K., & Matsui, T. (2019). Articulatory and spectrum information fusion based on deep recurrent neural networks. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(4), 742\u2013752.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"9974_CR26","doi-asserted-by":"publisher","first-page":"2614","DOI":"10.1121\/1.4964509","volume":"140","author":"Z Zhang","year":"2016","unstructured":"Zhang, Z. (2016). Mechanics of human voice production and control\u201d. Journal of the Acoustical Society of America, 140(4), 2614\u20132635.","journal-title":"Journal of the Acoustical Society of America"},{"key":"9974_CR27","doi-asserted-by":"publisher","first-page":"148556","DOI":"10.1109\/ACCESS.2020.3015876","volume":"8","author":"H Zhang","year":"2020","unstructured":"Zhang, H., Li, Y., Hu, Y., & Zhao, X. (2020). An encrypted speech retrieval method based on deep perceptual hashing and CNN-BiLSTM. IEEE Access, 8, 148556\u2013148569.","journal-title":"IEEE Access"},{"issue":"2","key":"9974_CR28","doi-asserted-by":"publisher","first-page":"522","DOI":"10.1109\/TBME.2014.2359372","volume":"62","author":"Z Zhang","year":"2015","unstructured":"Zhang, Z., Pi, Z., & Liu, B. (2015). Troika: A general framework for heart rate monitoring using wrist-type photoplethysmographic signals during intensive physical exercise. IEEE Transactions on Biomedical Engineering, 62(2), 522\u2013531.","journal-title":"IEEE Transactions on Biomedical Engineering"},{"key":"9974_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, N. & Lin, J. (2018). An efficient content based music retrieval algorithm. In Proceedings of the 5th International Conference on Vehicle Technology and Intelligent Transport Big Data Smart City (ICITBS), Xiamen, pp. 617\u2013620.","DOI":"10.1109\/ICITBS.2018.00161"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09974-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-022-09974-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09974-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T11:16:10Z","timestamp":1670843770000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-022-09974-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,16]]},"references-count":29,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,12]]}},"alternative-id":["9974"],"URL":"https:\/\/doi.org\/10.1007\/s10772-022-09974-9","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,5,16]]},"assertion":[{"value":"3 March 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 April 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}