{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T11:04:48Z","timestamp":1773486288619,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,27]],"date-time":"2025-12-27T00:00:00Z","timestamp":1766793600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,27]],"date-time":"2025-12-27T00:00:00Z","timestamp":1766793600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Pattern Anal Applic"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10044-025-01593-9","type":"journal-article","created":{"date-parts":[[2025,12,27]],"date-time":"2025-12-27T00:27:06Z","timestamp":1766795226000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Rectangular kernels for information-dense domains in environmental sound classification"],"prefix":"10.1007","volume":"29","author":[{"given":"Zhenghao","family":"Chang","sequence":"first","affiliation":[]},{"given":"Ruhan","family":"He","sequence":"additional","affiliation":[]},{"given":"Yongsheng","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,27]]},"reference":[{"key":"1593_CR1","doi-asserted-by":"crossref","unstructured":"Wang L, Zhu H, Zhang X (2020) Transfer learning for music classification and regression tasks using artist tags. In: Proceedings of the 7th conference on sound and music technology, pp. 81\u201389. Springer, Singapore","DOI":"10.1007\/978-981-15-2756-2_7"},{"key":"1593_CR2","doi-asserted-by":"crossref","unstructured":"Bian W, Wang J, Zhuang B (2019) Audio-based music classification with DenseNet and data augmentation. The 16th Pacific Rim international conference on artificial intelligence. Springer, Fiji, pp 56\u201365","DOI":"10.1007\/978-3-030-29894-4_5"},{"key":"1593_CR3","unstructured":"Vacher M, SerignatJ C (2007) Sound classification in a smart room environment: an approach using GMM and HMM methods. The 4th IEEE conference on speech technology and human-computer dialogue. Publishing House of the Romanian Academy, Constanta, pp 135\u2013146"},{"key":"1593_CR4","doi-asserted-by":"crossref","unstructured":"Li H, Ishikawa S, Zhao Q (2007) Robot navigation and sound based position identification. The 2007 IEEE international conference on systems, man and cybernetics. IEEE, Montreal, pp 2449\u20132454","DOI":"10.1109\/ICSMC.2007.4413757"},{"issue":"5","key":"1593_CR5","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1109\/MSP.2010.937498","volume":"27","author":"F Lyon","year":"2010","unstructured":"Lyon F (2010) Machine hearing: an emerging field. IEEE Signal Process Mag 27(5):131\u2013139","journal-title":"IEEE Signal Process Mag"},{"key":"1593_CR6","doi-asserted-by":"crossref","unstructured":"Cotton C (2011) Spectral vs. spectro-temporal features for acoustic event detection. In: The 2011 IEEE workshop on applications of signal processing to audio and acoustics, pp. 69-72. IEEE, New Paltz","DOI":"10.1109\/ASPAA.2011.6082331"},{"issue":"1","key":"1593_CR7","doi-asserted-by":"publisher","first-page":"195","DOI":"10.7150\/ijbs.29863","volume":"15","author":"Y Shi","year":"2019","unstructured":"Shi Y, Li Y, Cai M (2019) A lung sound category recognition method based on wavelet decomposition and BP neural network. Int J Biol Sci 15(1):195","journal-title":"Int J Biol Sci"},{"key":"1593_CR8","doi-asserted-by":"crossref","unstructured":"Wang C, Wang F, He W (2006) Environmental sound classification using hybrid SVM\/KNN classifier and MPEG-7 audio low-level descriptor. The 2006 IEEE international joint conference on neural network proceedings. IEEE, Vancouver, pp 1731\u20131735","DOI":"10.1109\/IJCNN.2006.246644"},{"issue":"6","key":"1593_CR9","doi-asserted-by":"publisher","first-page":"1216","DOI":"10.1109\/TASLP.2017.2690570","volume":"25","author":"V Bisot","year":"2017","unstructured":"Bisot V, Serizel R, Essid S (2017) Feature learning with matrix factorization applied to acoustic scene classification. IEEE\/ACM Trans Audio Speech Language Process 25(6):1216\u20131229","journal-title":"IEEE\/ACM Trans Audio Speech Language Process"},{"key":"1593_CR10","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1016\/j.eswa.2017.12.015","volume":"96","author":"Y Zhang","year":"2018","unstructured":"Zhang Y, Wang Y, Zhou G (2018) Multi-kernel extreme learning machine for EEG classification in brain-computer interfaces. Expert Syst Appl 96:302\u2013310","journal-title":"Expert Syst Appl"},{"key":"1593_CR11","unstructured":"Bond R, Hoeffler A, Temple W (2001) GMM estimation of empirical growth models. Available at SSRN 290522"},{"issue":"1","key":"1593_CR12","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1080\/01431160412331269698","volume":"26","author":"M Pal","year":"2005","unstructured":"Pal M (2005) Random forest classifier for remote sensing classification. Int J Remote Sens 26(1):217\u2013222","journal-title":"Int J Remote Sens"},{"key":"1593_CR13","unstructured":"Tokozume Y, Ushiku Y (2017) Learning from between-class examples for deep sound recognition. arxiv preprint arxiv:1711.10282"},{"key":"1593_CR14","doi-asserted-by":"crossref","unstructured":"Dai W, Dai C, Qu S, Li J, Das S (2017) Very deep convolutional neural networks for raw waveforms. In 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP), 421-C425. IEEE","DOI":"10.1109\/ICASSP.2017.7952190"},{"key":"1593_CR15","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.eswa.2019.06.040","volume":"136","author":"A Sajjad","year":"2019","unstructured":"Sajjad A, Patrick C, Alessandro K (2019) End-to-end environmental sound classification using a 1d convolutional neural network. Expert Syst Appl 136:252\u2013263","journal-title":"Expert Syst Appl"},{"key":"1593_CR16","unstructured":"Zhang H, Cisse M, Dauphin YN (2017) mixup: Beyond empirical risk minimization. arxiv preprint arxiv:1710.09412"},{"key":"1593_CR17","doi-asserted-by":"crossref","unstructured":"Zhang Z, Xu S, Cao S (2018) Deep convolutional neural network with mixup for environmental sound classification. Chinese conference on pattern recognition and computer vision (prcv). Springer International Publishing, Cham, pp 356\u2013367","DOI":"10.1007\/978-3-030-03335-4_31"},{"key":"1593_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.ecoinf.2020.101084","volume":"57","author":"L Nanni","year":"2020","unstructured":"Nanni L, Maguolo G (2020) Data augmentation approaches for improving animal audio classification. Eco Inform 57:101084","journal-title":"Eco Inform"},{"issue":"8","key":"1593_CR19","doi-asserted-by":"publisher","first-page":"6301","DOI":"10.1007\/s10462-022-10153-0","volume":"55","author":"A Madhu","year":"2022","unstructured":"Madhu A (2022) EnvGAN: a GAN-based augmentation to improve environmental sound classification. Artif Intell Rev 55(8):6301\u20136320","journal-title":"Artif Intell Rev"},{"key":"1593_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107389","volume":"167","author":"Z Mushtaq","year":"2020","unstructured":"Mushtaq Z, Su SF (2020) Environmental sound classification using a regularized deep convolutional neural network with data augmentation. Appl Acoust 167:107389","journal-title":"Appl Acoust"},{"key":"1593_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107581","volume":"172","author":"Z Mushtaq","year":"2021","unstructured":"Mushtaq Z, Su F, Tran V (2021) Spectral images based environmental sound classification using CNN with meaningful data augmentation. Appl Acoust 172:107581","journal-title":"Appl Acoust"},{"key":"1593_CR22","doi-asserted-by":"publisher","first-page":"896","DOI":"10.1016\/j.neucom.2020.08.069","volume":"453","author":"Z Zhang","year":"2021","unstructured":"Zhang Z, Xu S, Zhang S, Qiao T, Cao S (2021) Attention based convolutional recurrent neural network for environmental sound classification. Neurocomputing 453:896\u2013903","journal-title":"Neurocomputing"},{"key":"1593_CR23","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1016\/j.neucom.2021.06.031","volume":"460","author":"M Tripathi","year":"2021","unstructured":"Tripathi M, Mishra A (2021) Environment sound classification using an attention-based residual neural network. Neurocomputing 460:409\u2013423","journal-title":"Neurocomputing"},{"issue":"1","key":"1593_CR24","doi-asserted-by":"publisher","first-page":"21552","DOI":"10.1038\/s41598-021-01045-4","volume":"11","author":"W Mu","year":"2021","unstructured":"Mu W, Yin B, Huang X, Xu J, Du Z (2021) Environmental sound classification using temporal-frequency attention based convolutional neural network. Sci Rep 11(1):21552","journal-title":"Sci Rep"},{"issue":"5","key":"1593_CR25","doi-asserted-by":"publisher","first-page":"3416","DOI":"10.1109\/JIOT.2021.3098464","volume":"9","author":"B Wu","year":"2021","unstructured":"Wu B, Zhang P (2021) Environmental sound classification via time-frequency attention and framewise self-attention-based deep neural networks. IEEE Internet Things J 9(5):3416\u20133428","journal-title":"IEEE Internet Things J"},{"key":"1593_CR26","doi-asserted-by":"crossref","unstructured":"Wang Y, Feng C, Anderson DV (2021) A multi-channel temporal attention convolutional neural network model for environmental sound classification. In ICASSP 2021-2021 IEEE international conference on acoustics, speech and signal processing (ICASSP), 930-934. IEEE","DOI":"10.1109\/ICASSP39728.2021.9413498"},{"issue":"4","key":"1593_CR27","doi-asserted-by":"publisher","first-page":"5089","DOI":"10.1007\/s11042-021-11610-8","volume":"81","author":"L Gao","year":"2022","unstructured":"Gao L, Xu K, Wang H, Peng Y (2022) Multi-representation knowledge distillation for audio classification. Multimedia Tools Appl 81(4):5089\u20135112","journal-title":"Multimedia Tools Appl"},{"key":"1593_CR28","doi-asserted-by":"publisher","first-page":"1100","DOI":"10.1109\/TASLP.2023.3244507","volume":"31","author":"AM Tripathi","year":"2023","unstructured":"Tripathi AM, Pandey OJ (2023) Divide and distill: new outlooks on knowledge distillation for environmental sound classification. IEEE\/ACM Trans Audio Speech Language Process 31:1100\u20131113","journal-title":"IEEE\/ACM Trans Audio Speech Language Process"},{"key":"1593_CR29","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee JY, Kweon IS (2018) Cbam: Convolutional block attention module. In Proceedings of the European conference on computer vision (ECCV), 3-19","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1593_CR30","doi-asserted-by":"crossref","unstructured":"Tripathi M, Mishra A (2023) Sub-band contrastive learning-based knowledge distillation for sound classification. In ICASSP 2023-2023 IEEE international conference on acoustics, speech and signal processing (ICASSP), 1-5. IEEE","DOI":"10.1109\/ICASSP49357.2023.10095677"},{"key":"1593_CR31","doi-asserted-by":"crossref","unstructured":"Liao L, Zhang X, Li C (2019) Multi-path convolutional neural network based on rectangular kernel with path signature features for gesture recognition. In 2019 IEEE visual communications and image processing (VCIP), 1-4. IEEE","DOI":"10.1109\/VCIP47243.2019.8965816"},{"key":"1593_CR32","doi-asserted-by":"crossref","unstructured":"Guzhov A, Raue F, Hees J, Dengel A (2021) Esresnet: environmental sound classification based on visual domain models. In 2020 25th international conference on pattern recognition (ICPR), 4933-4940. IEEE","DOI":"10.1109\/ICPR48806.2021.9413035"},{"key":"1593_CR33","unstructured":"Chang Z, He R, Yu Y, Zhang Z, Bai G (2023) A two-stream convolution architecture for ESC based on audio feature distanglement. In Asian Conference on Machine Learning, 153-168. PMLR"},{"key":"1593_CR34","doi-asserted-by":"publisher","first-page":"5571","DOI":"10.1007\/s11042-017-5292-7","volume":"78","author":"M Badshah","year":"2019","unstructured":"Badshah M, Rahim N, Ullah N, Ahmad J (2019) Deep features-based speech emotion recognition for smart affective services. Multimedia Tools Appl 78:5571\u20135589","journal-title":"Multimedia Tools Appl"},{"key":"1593_CR35","doi-asserted-by":"publisher","first-page":"107923","DOI":"10.1016\/j.patcog.2021.107923","volume":"116","author":"Z Chen","year":"2021","unstructured":"Chen Z, Xu TB, Liao W (2021) SNAP: shaping neural architectures progressively via information density criterion. Pattern Recogn 116:107923","journal-title":"Pattern Recogn"},{"key":"1593_CR36","doi-asserted-by":"publisher","first-page":"1287","DOI":"10.1007\/s11265-021-01702-x","volume":"93","author":"C Liu","year":"2021","unstructured":"Liu C, Hong F, Feng H, Zhai Y, Chen Y (2021) Environmental sound classification based on stacked concatenated DNN using aggregated features. J Signal Process Syst 93:1287\u20131299","journal-title":"J Signal Process Syst"},{"issue":"12","key":"1593_CR37","doi-asserted-by":"publisher","first-page":"5988","DOI":"10.3390\/app12125988","volume":"12","author":"J Guo","year":"2022","unstructured":"Guo J, Li C, Sun Z, Li J, Wang P (2022) A deep attention model for environmental sound classification from multi-feature data. Appl Sci 12(12):5988","journal-title":"Appl Sci"},{"key":"1593_CR38","doi-asserted-by":"publisher","first-page":"109025","DOI":"10.1016\/j.patcog.2022.109025","volume":"133","author":"M Mohaimenuzzaman","year":"2023","unstructured":"Mohaimenuzzaman M, Bergmeir C, West I, Meyer B (2023) Environmental sound classification on the edge: a pipeline for deep acoustic networks on extremely resource-constrained devices. Pattern Recogn 133:109025","journal-title":"Pattern Recogn"},{"issue":"5","key":"1593_CR39","doi-asserted-by":"publisher","first-page":"1045","DOI":"10.3390\/sym15051045","volume":"15","author":"M Huang","year":"2023","unstructured":"Huang M, Wang M, Liu X, Kan R, Qiu H (2023) Environmental sound classification framework based on L-mHP features and SE-ResNet50 network model. Symmetry 15(5):1045","journal-title":"Symmetry"},{"key":"1593_CR40","doi-asserted-by":"publisher","first-page":"27044","DOI":"10.1007\/s10489-023-04973-y","volume":"53","author":"K Presannakumar","year":"2023","unstructured":"Presannakumar K, Mohamed A (2023) Source identification of weak audio signals using attention based convolutional neural network. Appl Intell 53:27044\u201327059","journal-title":"Appl Intell"},{"key":"1593_CR41","doi-asserted-by":"publisher","first-page":"104170","DOI":"10.1016\/j.dsp.2023.104170","volume":"141","author":"S Dong","year":"2023","unstructured":"Dong S, Xia Z, Pan X, Yu T (2023) Environmental sound classification based on improved compact bilinear attention network. Digital Signal Processing 141:104170","journal-title":"Digital Signal Processing"},{"key":"1593_CR42","doi-asserted-by":"publisher","first-page":"110181","DOI":"10.1016\/j.apacoust.2024.110181","volume":"225","author":"M Mahyub","year":"2024","unstructured":"Mahyub M, Souza LS, Batalo B, Fukui K (2024) Signal latent subspace: a new representation for environmental sound classification. Appl Acoust 225:110181","journal-title":"Appl Acoust"},{"key":"1593_CR43","doi-asserted-by":"publisher","first-page":"109759","DOI":"10.1016\/j.apacoust.2023.109759","volume":"216","author":"A Ashurov","year":"2024","unstructured":"Ashurov A, Yi Z, Liu H, Yu Z, Li M (2024) Concatenation-based pre-trained convolutional neural networks using attention mechanism for environmental sound classification. Appl Acoust 216:109759","journal-title":"Appl Acoust"}],"container-title":["Pattern Analysis and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-025-01593-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10044-025-01593-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-025-01593-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T10:38:43Z","timestamp":1773484723000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10044-025-01593-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,27]]},"references-count":43,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["1593"],"URL":"https:\/\/doi.org\/10.1007\/s10044-025-01593-9","relation":{},"ISSN":["1433-7541","1433-755X"],"issn-type":[{"value":"1433-7541","type":"print"},{"value":"1433-755X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,27]]},"assertion":[{"value":"18 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This manuscript does not involve any research related to humans or animals.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"15"}}