{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T18:40:12Z","timestamp":1763059212531,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T00:00:00Z","timestamp":1725580800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T00:00:00Z","timestamp":1725580800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00034-024-02836-6","type":"journal-article","created":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T18:02:33Z","timestamp":1725645753000},"page":"239-280","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Acoustic Scene Classification Using Various Features and DNN Model: A Monolithic and Hierarchical Approach"],"prefix":"10.1007","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3389-4330","authenticated-orcid":false,"given":"Chandrasekhar","family":"Paseddula","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Suryakanth V.","family":"Gangashetty","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,6]]},"reference":[{"key":"2836_CR1","doi-asserted-by":"crossref","unstructured":"K.N.R.K.R. Alluri, S. Achanta, S.R. Kadiri, S.V. Gangashetty, A.K. Vuppala, Detection of replay attacks using single frequency filtering cepstral coefficients, in Proceedings of Interspeech, (August, 2017)","DOI":"10.21437\/Interspeech.2017-256"},{"issue":"3","key":"2836_CR2","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1016\/j.dsp.2006.06.007","volume":"17","author":"LD Alsteris","year":"2007","unstructured":"L.D. Alsteris, K.K. Paliwal, Short-time phase spectrum in speech processing: a review and some experimental results. Digital Signal Process. 17(3), 578\u2013616 (2007)","journal-title":"Digital Signal Process."},{"key":"2836_CR3","doi-asserted-by":"crossref","unstructured":"H. Banno, J. Lu, S. Nakamura, K. Shikano, H.K. Wahara, Efficient representation of short-time phase based on group delay, in Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP , vol.\u00a02 (1998), pp. 861\u2013864","DOI":"10.1109\/ICASSP.1998.675401"},{"issue":"3","key":"2836_CR4","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1109\/MSP.2014.2326181","volume":"32","author":"D Barchiesi","year":"2015","unstructured":"D. Barchiesi, D. Giannoulis, D. Stowell, M.D. Plumbley, Acoustic scene classification: classifying environments from the sounds they produce. IEEE Signal Process. Mag. 32(3), 16\u201334 (2015)","journal-title":"IEEE Signal Process. Mag."},{"key":"2836_CR5","doi-asserted-by":"crossref","unstructured":"Y. Bayya, Formant extraction from linear prediction phase spectra. J. Acoustical Soc of Am. 63(5), 1638\u20131640 (May, 1978)","DOI":"10.1121\/1.381864"},{"key":"2836_CR6","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.specom.2018.01.003","volume":"97","author":"N Chennupati","year":"2018","unstructured":"N. Chennupati, S.R. Kadiri, Y. Bayya, Significance of phase in single frequency filtering outputs of speech signals. Speech Commun. 97, 66\u201372 (2018)","journal-title":"Speech Commun."},{"key":"2836_CR7","doi-asserted-by":"crossref","unstructured":"B. Clarkson, A. Pentland, Unsupervised clustering of ambulatory audio and video, in Proceedings of International Conference on Acoustics Speech and Signal Processing, Washington, DC, USA (1999), pp. 3037\u20133040","DOI":"10.1109\/ICASSP.1999.757481"},{"key":"2836_CR8","doi-asserted-by":"crossref","unstructured":"J.T. Geiger, B. Schuller, G. Rigoll, Recognising acoustic scenes with large-scale audio feature extraction and SVM, in in Proceedings of IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (2013)","DOI":"10.1109\/WASPAA.2013.6701857"},{"key":"2836_CR9","unstructured":"D. Giannoulis, D. Stowell, E. Benetos, M. Rossignol, M. Lagrange, M.D. Plumbley, A database and challenge for acoustic scene classification and event detection, in Proceedings of European Signal Processing Conference (September, 2013)"},{"key":"2836_CR10","unstructured":"M.C. Green, D. Murphy, Acoustic scene classification using spatial features, in Proceedings of Detection and Classification of Acoustic Scenes and Events Workshop (DCASE), November (2017), pp. 42\u201345"},{"key":"2836_CR11","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1109\/TASLP.2015.2404035","volume":"23","author":"A Gunnam","year":"2015","unstructured":"A. Gunnam, Y. Bayya, Single frequency filtering approach for discriminating speech and nonspeech. IEEE\/ACM Trans. Audio Speech Lang. Process. 23, 705\u2013717 (2015)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2836_CR12","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.specom.2016.10.002","volume":"85","author":"C Hanil\u00e7i","year":"2016","unstructured":"C. Hanil\u00e7i, T. Kinnunena, Md. Sahidullaha, A. Sizova, Spoofing detection goes noisy: an analysis of synthetic speech detection in the presence of additive noise. Speech Commun. 85, 83\u201397 (2016)","journal-title":"Speech Commun."},{"key":"2836_CR13","doi-asserted-by":"crossref","unstructured":"S.R. Kadiri, Y. Bayya, Analysis and detection of phonation modes in singing voice using excitation source features and single frequency filtering cepstral coefficients (SFFCC), in Proceedings of Interspeech (2018), pp. 441\u2013445","DOI":"10.21437\/Interspeech.2018-2502"},{"key":"2836_CR14","doi-asserted-by":"crossref","unstructured":"N. Kamarudin, S.A.R. Al-Haddad, A. Khmag, S.J. Hashim, A.R.B. Hassan, Sequential parameterizing affine projection (SPAP) windowing length for acoustic echo cancellation on speech accents identification, in Proceedings of Electric and Electronics, Computer Science, Biomedical Engineerings Meeting (EBBT) (Istanbul, Turkey, 2017)","DOI":"10.1109\/EBBT.2017.7956787"},{"key":"2836_CR15","first-page":"7301","volume":"11","author":"N Kamarudin","year":"2016","unstructured":"N. Kamarudin, S.A.R. Al-Haddad, A. Khmag, A.R.B. Hassan, S.J. Hashim, Analysis on Mel frequency cepstral coefficients and linear predictive cepstral coefficients as feature extraction on automatic accents identification. Int. J. Appl. Eng. Res. 11, 7301\u20137307 (2016)","journal-title":"Int. J. Appl. Eng. Res."},{"key":"2836_CR16","doi-asserted-by":"crossref","unstructured":"B.K. Khonglah, K.T. Deepak, S.R.M. Prasanna, Indoor\/outdoor audio classification using foreground speech segmentation, in Proceedings of Interspeech (2017)","DOI":"10.21437\/Interspeech.2017-309"},{"key":"2836_CR17","unstructured":"D.P. Kingma, J.L. Ba, Adam: a method for stochastic optimization, CoRR, vol. abs\/1412.6980 (2014)"},{"issue":"3","key":"2836_CR18","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1109\/34.667881","volume":"20","author":"J Kittler","year":"1998","unstructured":"J. Kittler, M. Hatef, R.P.W. Duin, J. Matas, On combining classifiers. IEEE Trans. Pattern Anal. Mach. Intell. 20(3), 226\u2013239 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2836_CR19","unstructured":"J.D. Krijnders, G.A.T. Holt, Tone-fit and MFCC scene classification compared to human recognition, in Proceedings of IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, (May, 2013)"},{"key":"2836_CR20","unstructured":"J. M. K. Kua, T. Thiruvaran, M. Nosratighods, E. Ambikairajah, J. Epps, Investigation of spectral centroid magnitude and frequency for speaker recognition. Odyssey 2010, The Speaker and Language Recognition Workshop, Brno, Czech Republic, 28, 34\u201339, (July, 2010)"},{"issue":"2","key":"2836_CR21","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1109\/TASLP.2017.2778423","volume":"26","author":"A Mesaros","year":"2018","unstructured":"A. Mesaros, T. Heittola, E. Benetos, P. Foster, M. Lagrange, T. Virtanen, M.D. Plumbley, Detection and classification of acoustic scenes and events: outcome of the dcase 2016 challenge. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(2), 379\u2013393 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2836_CR22","unstructured":"A. Mesaros, T. Heittola, A. Diment, B. Elizalde, A. Shah, E. Vincent, B. Raj, T. Virtanen, DCASE 2017 challenge setup: tasks, datasets and baseline system, in Proceedings of Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE) (November, 2017), pp. 85\u201392"},{"key":"2836_CR23","doi-asserted-by":"crossref","unstructured":"A. Mesaros, T. Heittola, T. Virtanen, TUT Database for acoustic scene classification and sound event detection, in Proceedings of European Signal Processing Conference (EUSIPCO), Budapest, Hungary (2016)","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"key":"2836_CR24","unstructured":"S. Mun, S. Park, D.K. Han, H. Ko, Generative adversarial network based acoustic scene training set augmentation and selection using SVM hyper-plane, in Proceedings of Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September (2017)"},{"key":"2836_CR25","unstructured":"M. Niessen, C. Cance, D. Dubois, Categories for soundscape-Toward a hybrid classification, in Proceedings of Internoise (2010), pp. 5816\u20135829, 01"},{"key":"2836_CR26","unstructured":"W. Nogueira, Sound scene identification based on monaural and binaural features, in Proceedings of IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events (2016)"},{"key":"2836_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107568","volume":"172","author":"C Paseddula","year":"2021","unstructured":"C. Paseddula, S.V. Gangashetty, Late fusion framework for acoustic scene classification using LPCC, SCMC, and log-Mel band energies with deep neural networks. J. Appl. Acoust. 172, 107568 (2021)","journal-title":"J. Appl. Acoust."},{"key":"2836_CR28","doi-asserted-by":"crossref","unstructured":"C. Paseddula, S.V. Gangashetty, Acoustic scene classification using single frequency filtering cepstral coefficients and DNN, in Proceedings of International Joint Conference on Neural Networks (IJCNN), pp. 1-6 (2020)","DOI":"10.1109\/IJCNN48605.2020.9207257"},{"key":"2836_CR29","doi-asserted-by":"crossref","unstructured":"C. Paseddula, S.V. Gangashetty, DNN based acoustic scene classification using score fusion of MFCC and Inverse MFCC, in Proceedings of IEEE 13th International Conference on Industrial and Information Systems (ICIIS) (2018), pp. 18\u201321","DOI":"10.1109\/ICIINFS.2018.8721379"},{"key":"2836_CR30","unstructured":"K. Patil, M. Elhilali, Multiresolution auditory representations for scene classification, in Proceedings of IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events (2013)"},{"key":"2836_CR31","doi-asserted-by":"crossref","unstructured":"L. Pham, I. McLoughlin, H. Phan, R. Palaniappan, A. Mertins, Deep feature embedding and hierarchical classification for audio scene classification, in Proceedings of International Joint Conference on Neural Networks (IJCNN) (2020), pp. 1\u20137","DOI":"10.1109\/IJCNN48605.2020.9206866"},{"key":"2836_CR32","doi-asserted-by":"crossref","unstructured":"J. Salamon, C. Jacoby, J.P. Bello, A dataset and taxonomy for urban sound research, in Proceedings of ACM International Conference on Multimedia (2014), pp. 1041\u20131044","DOI":"10.1145\/2647868.2655045"},{"key":"2836_CR33","unstructured":"N. Sawhney, P. Maes, Situational Awareness from Environmental Sounds (Computer Science Engineering, Environmental Science, 1997)"},{"key":"2836_CR34","doi-asserted-by":"crossref","unstructured":"E. Scheirer, M. Slaney, Construction and evaluation of a robust multifeature speech\/music discriminator, in Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a02 (1997), pp. 1331\u20131334","DOI":"10.1109\/ICASSP.1997.596192"},{"key":"2836_CR35","doi-asserted-by":"crossref","unstructured":"D. Sharma, I. Ali, A modified MFCC feature extraction technique for robust speaker recognition, in Proceedings of International Conference on Advances in Computing, Communications and Informatics (ICACCI) (Aug, 2015), pp. 1052\u20131057","DOI":"10.1109\/ICACCI.2015.7275749"},{"issue":"10","key":"2836_CR36","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.1109\/TMM.2015.2428998","volume":"17","author":"D Stowell","year":"2015","unstructured":"D. Stowell, D. Giannoulis, E. Benetos, M. Lagrange, M.D. Plumbley, Detection and classification of acoustic scenes and events. IEEE Trans. Multimedia 17(10), 1733\u20131746 (2015)","journal-title":"IEEE Trans. Multimedia"},{"key":"2836_CR37","doi-asserted-by":"crossref","unstructured":"S. Waldekar, G. Saha, Two-level fusion-based acoustic scene classification. Appl. Acoust. 170 , 1\u201311, 107502 , (June, 2020)","DOI":"10.1016\/j.apacoust.2020.107502"},{"key":"2836_CR38","unstructured":"Y. Xu, Q. Huang, W. Wang, M.D. Plumbley, Hierarchical learning for DNN-based acoustic scene classification, in Proceedings of Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE) (July, 2016)"},{"key":"2836_CR39","doi-asserted-by":"crossref","unstructured":"Y. Yin, R.R. Shah, R. Zimmermann, Learning and fusing multimodal deep features for acoustic scene categorization, in Proceedings of ACM International Conference on Multimedia, vol. 10 (2018), pp. 1892\u20131900","DOI":"10.1145\/3240508.3240631"},{"key":"2836_CR40","doi-asserted-by":"crossref","unstructured":"L. Zhang, J. Han, S. Deng, Unsupervised temporal feature learning based on sparse coding embedded BoAW for acoustic event recognition, in Proceedings of Interspeech (2018), pp. 3284\u20133288","DOI":"10.21437\/Interspeech.2018-1243"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-024-02836-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-024-02836-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-024-02836-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T13:32:50Z","timestamp":1737379970000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-024-02836-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,6]]},"references-count":40,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["2836"],"URL":"https:\/\/doi.org\/10.1007\/s00034-024-02836-6","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2024,9,6]]},"assertion":[{"value":"1 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}