{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T03:26:05Z","timestamp":1777519565566,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,10,24]],"date-time":"2022-10-24T00:00:00Z","timestamp":1666569600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,24]],"date-time":"2022-10-24T00:00:00Z","timestamp":1666569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11063-022-11041-y","type":"journal-article","created":{"date-parts":[[2022,10,24]],"date-time":"2022-10-24T10:02:40Z","timestamp":1666605760000},"page":"4291-4306","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Attention Based Convolutional Neural Network with Multi-frequency Resolution Feature for Environment Sound Classification"],"prefix":"10.1007","volume":"55","author":[{"given":"Minze","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wu","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,24]]},"reference":[{"key":"11041_CR1","unstructured":"Gerstoft P, Hu Y, Patil C et al (2021) Audio scene monitoring using redundant un-localized microphone arrays. arXiv preprint arXiv:2103.01830"},{"key":"11041_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107568","volume":"172","author":"C Paseddula","year":"2021","unstructured":"Paseddula C, Gangashetty SV (2021) Late fusion framework for acoustic scene classification using LPCC, SCMC, and log-mel band energies with deep neural networks. Appl Acoust 172:107568","journal-title":"Appl Acoust"},{"key":"11041_CR3","doi-asserted-by":"crossref","unstructured":"Castiajo P, Pinheiro AP (2021) Acoustic salience in emotional voice perception and its relationship with hallucination proneness. Cogn Affect Behav Neurosci : 1\u201314.","DOI":"10.3758\/s13415-021-00864-2"},{"issue":"5","key":"11041_CR4","first-page":"1037","volume":"38","author":"W Dai","year":"2017","unstructured":"Dai W, Dai C, Qu S et al (2017) Very deep convolutional neural networks for raw waveforms. IEEE Trans Pattern Anal Mach Intell 38(5):1037\u20131049","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"12","key":"11041_CR5","doi-asserted-by":"publisher","first-page":"2346","DOI":"10.1121\/1.1919362","volume":"36","author":"RN Shepard","year":"1964","unstructured":"Shepard RN (1964) Circularity in judgments of relative pitch. J Acoust Soc Am 36(12):2346\u20132361","journal-title":"J Acoust Soc Am"},{"key":"11041_CR6","doi-asserted-by":"crossref","unstructured":"Sangeetha J, Hariprasad R, Subhiksha S (2021) Analysis of machine learning algorithms for audio event classification using Mel-frequency cepstral coefficients. In: Applied Speech Processing. Academic Press, pp 175\u2013189","DOI":"10.1016\/B978-0-12-823898-1.00009-6"},{"key":"11041_CR7","doi-asserted-by":"crossref","unstructured":"Kumaran U, Rammohan S R, Nagarajan S M et al. (2021) Fusion of mel and gammatone frequency cepstral coefficients for speech emotion recognition using deep C-RNN. Int J Speech Technol, 1\u201312.","DOI":"10.1007\/s10772-020-09792-x"},{"key":"11041_CR8","doi-asserted-by":"crossref","unstructured":"Piczak KJ (2015) Environmental sound classification with convolutional neural networks. In: 2015 IEEE 25th international workshop on machine learning for signal processing (MLSP). IEEE, pp 1\u20136","DOI":"10.1109\/MLSP.2015.7324337"},{"issue":"1","key":"11041_CR9","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.apacoust.2018.12.006","volume":"148","author":"RV Sharan","year":"2019","unstructured":"Sharan RV, Moir TJ (2019) Acoustic event recognition using cochleagram image and convolutional neural networks. Appl Acoust 148(1):62\u201366","journal-title":"Appl Acoust"},{"key":"11041_CR10","doi-asserted-by":"crossref","unstructured":"Singh P, Saha G, Sahidullah M (2021) Non-linear frequency warping using constant-Q transformation for speech emotion recognition. In: 2021 international conference on computer communication and informatics (ICCCI). IEEE, pp 1\u20136","DOI":"10.1109\/ICCCI50826.2021.9402569"},{"key":"11041_CR11","doi-asserted-by":"crossref","unstructured":"Shu HY, Song Y, Zhou H (2018) Time-frequency performance study on urban sound classification with convolutional neural network. In: 2018 IEEE region 10 conference. Jeju, Korea (South), IEEE, pp 1713\u20131717","DOI":"10.1109\/TENCON.2018.8650428"},{"key":"11041_CR12","doi-asserted-by":"crossref","unstructured":"Tan LN, Yonggwan W (2016) Frequency features selection using decision tree for classification of sleep breathing sound. In: Information science and applications (ICISA), vol 376(9). Springer Singapore, pp 1375\u20131380","DOI":"10.1007\/978-981-10-0557-2_130"},{"key":"11041_CR13","unstructured":"Grobelnik M (1999) Feature selection for unbalanced class distribution and Naive Bayes. In: Proceedings of the sixteenth international conference on machine learning (ICML), Bled, Slovenia, pp 258\u2013267"},{"key":"11041_CR14","unstructured":"Christoph HA, Julian B, Steffen L et al (2017) Nonnegative matrix factorization and random forest for classification of heart sound recordings in the spectral domain. In: Computing in cardiology conference. IEEE"},{"key":"11041_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.108040","volume":"179","author":"MA Kobat","year":"2021","unstructured":"Kobat MA, Dogan S (2021) Novel three kernelled binary pattern feature extractor based automated PCG sound classification method. Appl Acoust 179:108040","journal-title":"Appl Acoust"},{"key":"11041_CR16","unstructured":"Aytar Y, Vondrick C, Torralba A (2019) SoundNet: learning sound representations from unlabeled video. pp 1\u20139. arXiv:1610.09001v1 [cs.CV]"},{"issue":"8","key":"11041_CR17","doi-asserted-by":"publisher","first-page":"2048","DOI":"10.1016\/j.procs.2017.08.250","volume":"112","author":"V Boddapati","year":"2017","unstructured":"Boddapati V, Petef A, Rasmusson J et al (2017) Classifying environmental sounds using image recognition networks. Procedia Comput Sci 112(8):2048\u20132056","journal-title":"Procedia Comput Sci"},{"key":"11041_CR18","doi-asserted-by":"crossref","unstructured":"Zhang XH, Zou YX, Wang WW (2018) LD-CNN: a lightweight dilated convolutional neural network for environmental sound classification. In: 2018 24th international conference on pattern recognition (ICPR). IEEE, pp 373\u2013378","DOI":"10.1109\/ICPR.2018.8545679"},{"issue":"2","key":"11041_CR19","doi-asserted-by":"publisher","first-page":"130327","DOI":"10.1109\/ACCESS.2019.2939495","volume":"7","author":"ZC Zhang","year":"2019","unstructured":"Zhang ZC, Xu SG, Zhang SQ et al (2019) Learning attentive representations for environmental sound classification. IEEE Access 7(2):130327\u2013130339","journal-title":"IEEE Access"},{"key":"11041_CR20","doi-asserted-by":"crossref","unstructured":"Sang J, Park S, Lee J (2018) Convolutional recurrent neural networks for urban sound classification using raw waveforms. In: 2018 26th European signal processing conference (EUSIPCO). IEEE, pp 2444\u20132448","DOI":"10.23919\/EUSIPCO.2018.8553247"},{"key":"11041_CR21","unstructured":"Jain R (2019) Improving performance and inference on audio classification tasks using capsule networks. pp 1\u20135. arXiv:1902.05069v1 [cs.SD]"},{"key":"11041_CR22","doi-asserted-by":"crossref","unstructured":"Piczak KJ (2015) ESC: dataset for environmental sound classification. In: 23rd ACM international conference on multimedia. ACM, pp 1015\u20131018.","DOI":"10.1145\/2733373.2806390"},{"key":"11041_CR23","doi-asserted-by":"crossref","unstructured":"Salamon J, Jacoby C, Bello JP (2014) A dataset and taxonomy for urban sound research. In: 22rd ACM international conference on multimedia, ACM, pp 1041\u20131044","DOI":"10.1145\/2647868.2655045"},{"key":"11041_CR24","doi-asserted-by":"crossref","unstructured":"Tokozume Y, Harada T (2017) Learning environmental sounds with end-to-end convolutional neural network. In: 2017 ieee international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 2721\u20132725","DOI":"10.1109\/ICASSP.2017.7952651"},{"key":"11041_CR25","unstructured":"Tokozume Y, Ushiku Y, Harada T (2018) Learning from between-class examples for deep sound recognition. pp 1\u20139. arXiv:1711.10282v2 [cs.LG]"},{"key":"11041_CR26","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.eswa.2019.06.040","volume":"136","author":"S Abdoli","year":"2019","unstructured":"Abdoli S, Cardinal P, Koerich AL (2019) End-to-end environmental sound classification using a 1D convolutional neural network. Expert Syst Appl 136:252\u2013263","journal-title":"Expert Syst Appl"},{"issue":"5","key":"11041_CR27","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/978-3-030-05716-9_13","volume":"11298","author":"D Chong","year":"2019","unstructured":"Chong D, Zou Y, Wang W (2019) Multi-channel convolutional neural networks with multi-level feature fusion for environmental sound classification. MultiMed Model 11298(5):157\u2013168","journal-title":"MultiMed Model"},{"key":"11041_CR28","doi-asserted-by":"publisher","first-page":"240","DOI":"10.3934\/publichealth.2021019","volume":"8","author":"KK Lella","year":"2021","unstructured":"Lella KK, Pja A (2021) Automatic COVID-19 disease diagnosis using 1D convolutional neural network and augmentation with human respiratory sound based on parameters: cough, breath, and voice. AIMS Public Health 8:240. https:\/\/doi.org\/10.3934\/publichealth.2021019","journal-title":"AIMS Public Health"},{"issue":"2","key":"11041_CR29","doi-asserted-by":"publisher","first-page":"240","DOI":"10.3934\/publichealth.2021019","volume":"8","author":"KK Lella","year":"2021","unstructured":"Lella KK, Pja A (2021) Automatic COVID-19 disease diagnosis using 1D convolutional neural network and augmentation with human respiratory sound based on parameters: cough, breath, and voice. AIMS Public Health 8(2):240\u2013264. https:\/\/doi.org\/10.3934\/publichealth.2021019 (PMID:34017889;PMCID:PMC8116184)","journal-title":"AIMS Public Health"},{"issue":"2","key":"11041_CR30","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton G (2012) ImageNet classification with deep convolutional neural networks. Adv Neural Inf Process Syst 25(2):1097\u20131105","journal-title":"Adv Neural Inf Process Syst"},{"key":"11041_CR31","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y et al. (2015) Going deeper with convolutions. In: 2015 IEEE Conference on computer vision and pattern recognition (CVPR). IEEE, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"issue":"12","key":"11041_CR32","doi-asserted-by":"publisher","first-page":"15801","DOI":"10.1007\/s11042-018-6991-4","volume":"78","author":"G Tang","year":"2019","unstructured":"Tang G, Liang R, Xie Y et al (2019) Improved convolutional neural networks for acoustic event classification. Multimed Tools Appl 78(12):15801\u201315816","journal-title":"Multimed Tools Appl"},{"issue":"5","key":"11041_CR33","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1016\/j.apacoust.2018.12.019","volume":"148","author":"Y Chen","year":"2019","unstructured":"Chen Y, Guo Q, Liang X et al (2019) Environmental sound classification with dilated convolutions. Appl Acoust 148(5):123\u2013132","journal-title":"Appl Acoust"},{"key":"11041_CR34","doi-asserted-by":"crossref","unstructured":"Zhu B, Wang C, Liu F, et al. (2018) Learning environmental sounds with multi-scale convolutional neural network. In: 2018 international joint conference on neural networks (IJCNN). IEEE, pp 1\u20138.","DOI":"10.1109\/IJCNN.2018.8489641"},{"issue":"7","key":"11041_CR35","doi-asserted-by":"publisher","first-page":"1152","DOI":"10.3390\/app8071152","volume":"8","author":"S Li","year":"2018","unstructured":"Li S, Yao Y, Hu J et al (2018) An ensemble stacked convolutional neural network model for environmental event sound recognition. Appl Sci 8(7):1152\u20131171","journal-title":"Appl Sci"},{"issue":"2","key":"11041_CR36","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1016\/j.ijar.2007.03.004","volume":"48","author":"AP Dempster","year":"2008","unstructured":"Dempster AP (2008) The Dempster\u2013Shafer calculus for statisticians. Int J Approx Reason 48(2):365\u2013377","journal-title":"Int J Approx Reason"},{"issue":"7","key":"11041_CR37","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.3390\/s19071733","volume":"19","author":"Y Su","year":"2019","unstructured":"Su Y, Zhang K, Wang JY et al (2019) Environment sound classification using a two-stream CNN based on decision-level fusion. Sensors 19(7):1733\u20131746","journal-title":"Sensors"},{"issue":"3","key":"11041_CR38","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/LSP.2017.2657381","volume":"24","author":"J Salamon","year":"2017","unstructured":"Salamon J, Bello JP (2017) Deep convolutional neural networks and data augmentation for environmental sound classification. IEEE Signal Process Lett 24(3):279\u2013283","journal-title":"IEEE Signal Process Lett"},{"key":"11041_CR39","doi-asserted-by":"crossref","unstructured":"Davis N, Kumaraswamy S (2018) Environmental sound classification using deep convolutional neural networks and data augmentation In: 2018 IEEE Recent Advances in Intelligent Computational Systems (RAICS). IEEE, pp 41\u201345","DOI":"10.1109\/RAICS.2018.8635051"},{"key":"11041_CR40","doi-asserted-by":"crossref","unstructured":"Zhang Z, Xu S, Cao S et al. (2018) Deep convolutional neural network with mixup for environmental sound classification. In: Chinese conference on pattern recognition and computer vision (PRCV). Springer, pp 356\u2013367","DOI":"10.1007\/978-3-030-03335-4_31"},{"key":"11041_CR41","doi-asserted-by":"crossref","unstructured":"Tadanobu I, Phongtharin V, Shiqiang W et al. (2019) Shuffling and mixing data augmentation for environmental sound classification. In: Proceedings of the detection and classification of acoustic scenes and events 2019 workshop (DCASE). New York University, pp 109\u2013103.","DOI":"10.33682\/wgyb-bt40"},{"key":"11041_CR42","doi-asserted-by":"crossref","unstructured":"Li X, Chebiyyam V, Kirchhoff K (2019) Multi-stream network with temporal attention for environmental sound classification. pp 1\u20135. arXiv:1901.08608v1 [cs.SD]","DOI":"10.21437\/Interspeech.2019-3019"},{"key":"11041_CR43","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, et al. (2018) MobileNetV2: inverted residuals and linear bottlenecks, pp 1\u201314. arXiv:1801.04381v4 [cs.CV]","DOI":"10.1109\/CVPR.2018.00474"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11041-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-11041-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11041-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T13:42:36Z","timestamp":1744206156000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-11041-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,24]]},"references-count":43,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["11041"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-11041-y","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,24]]},"assertion":[{"value":"15 September 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 October 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}