{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T23:00:27Z","timestamp":1773615627045,"version":"3.50.1"},"reference-count":29,"publisher":"Allerton Press","issue":"4","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Aut. Control Comp. Sci."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.3103\/s0146411623040119","type":"journal-article","created":{"date-parts":[[2023,8,27]],"date-time":"2023-08-27T07:01:49Z","timestamp":1693119709000},"page":"371-379","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Environmental Sound Classification Based on Attention Feature Fusion and Improved Residual Network"],"prefix":"10.3103","volume":"57","author":[{"family":"Jinfang Zeng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxing","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengjiao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"1627","published-online":{"date-parts":[[2023,8,27]]},"reference":[{"key":"7598_CR1","doi-asserted-by":"publisher","unstructured":"Baum, E., Harper, M., Alicea, R., and Ordonez, C., Sound identification for fire-fighting mobile robots, Second IEEE Int. Conf. on Robotic Computing (IRC), Laguna Hills, Calif., 2018, IEEE, 2018, pp. 79\u201386. https:\/\/doi.org\/10.1109\/IRC.2018.00020","DOI":"10.1109\/IRC.2018.00020"},{"key":"7598_CR2","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/tits.2015.2470216","volume":"17","author":"P. Foggia","year":"2016","unstructured":"Foggia, P., Petkov, N., Saggese, A., Strisciuglio, N., and Vento, M., Audio surveillance of roads: A system for detecting anomalous sounds, IEEE Trans. Intell. Transp. Syst., 2016, vol. 17, no. 1, pp. 279\u2013288. https:\/\/doi.org\/10.1109\/tits.2015.2470216","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"7598_CR3","doi-asserted-by":"publisher","unstructured":"Li, H., Ishikawa, S., Zhao, Q., Ebana, M., Yamamoto, H., and Huang, J., Robot navigation and sound based position identification, IEEE Conf. on Systems, Man and Cybernetics, Montreal, 2007, IEEE, 2007, pp. 2449\u20132454. https:\/\/doi.org\/10.1109\/ICSMC.2007.4413757","DOI":"10.1109\/ICSMC.2007.4413757"},{"key":"7598_CR4","unstructured":"Vacher, M., Istrate, D., Besacier, L., Serignat, J., and Castelli, E., Sound detection and classification for medical telesurvey, 2nd Conf. on Biomedical Engineering, Innsbruck, Austria, 2004, Calgary: ACTA Press, 2004, pp.\u00a0395\u2013398. https:\/\/hal.science\/hal-01088243."},{"key":"7598_CR5","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.specom.2011.11.004","volume":"54","author":"M. Sahidullah","year":"2012","unstructured":"Sahidullah, M. and Saha, G., Design, analysis and experimental evaluation of block based transformation in MFCC computation for speaker recognition, Speech Commun., 2012, vol. 54, no. 4, pp. 543\u2013565. https:\/\/doi.org\/10.1016\/j.specom.2011.11.004","journal-title":"Speech Commun."},{"key":"7598_CR6","doi-asserted-by":"publisher","first-page":"1684","DOI":"10.1109\/tmm.2012.2199972","volume":"14","author":"X. Valero","year":"2012","unstructured":"Valero, X. and Alias, F., Gammatone cepstral coefficients: Biologically inspired features for non-speech audio classification, IEEE Trans. Multimedia, 2012, vol. 14, no. 6, pp. 1684\u20131689. https:\/\/doi.org\/10.1109\/tmm.2012.2199972","journal-title":"IEEE Trans. Multimedia"},{"key":"7598_CR7","doi-asserted-by":"publisher","unstructured":"Chachada, S. and Kuo, C.-C.J., Environmental sound recognition: A survey, 2013 Asia-Pacific Signal and Information Processing Assoc. Annu. Summit and Conf., Kaohsiung, Taiwan, 2013, IEEE, 2013. https:\/\/doi.org\/10.1109\/apsipa.2013.6694338","DOI":"10.1109\/apsipa.2013.6694338"},{"key":"7598_CR8","doi-asserted-by":"publisher","first-page":"2048","DOI":"10.1016\/j.procs.2017.08.250","volume":"112","author":"V. Boddapati","year":"2017","unstructured":"Boddapati, V., Petef, A., Rasmusson, J., and Lundberg, L., Classifying environmental sounds using image recognition networks, Procedia Comput. Sci., 2017, vol. 112, pp. 2048\u20132056. https:\/\/doi.org\/10.1016\/j.procs.2017.08.250","journal-title":"Procedia Comput. Sci."},{"key":"7598_CR9","doi-asserted-by":"publisher","first-page":"66529","DOI":"10.1109\/access.2020.2984903","volume":"8","author":"F. Demir","year":"2020","unstructured":"Demir, F., Abdullah, D.A., and Sengur, A., A new deep CNN model for environmental sound classification, IEEE Access, 2020, vol. 8, pp. 66529\u201366537. https:\/\/doi.org\/10.1109\/access.2020.2984903","journal-title":"IEEE Access"},{"key":"7598_CR10","doi-asserted-by":"publisher","unstructured":"Zhang, H., McLoughlin, I., and Song, Ya., Robust sound event recognition using convolutional neural networks, 2015 IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP), South Brisbane, Australia, 2015, IEEE, 2015, pp. 559\u2013563. https:\/\/doi.org\/10.1109\/icassp.2015.7178031","DOI":"10.1109\/icassp.2015.7178031"},{"key":"7598_CR11","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., and Sun, J., Deep residual learning for image recognition, 2016 IEEE Conf. on Computer Vision and Pattern Recognition (CVPR), Las Vegas, 2016, IEEE, 2016, pp. 770\u2013778. https:\/\/doi.org\/10.1109\/cvpr.2016.90","DOI":"10.1109\/cvpr.2016.90"},{"key":"7598_CR12","doi-asserted-by":"publisher","unstructured":"Hendrycks, D. and Gimpel, K., Gaussian error linear units (gelus), 2016.https:\/\/doi.org\/10.48550\/arXiv.1606.08415","DOI":"10.48550\/arXiv.1606.08415"},{"key":"7598_CR13","first-page":"933","volume":"70","author":"Ya.N. Dauphin","year":"2017","unstructured":"Dauphin, Ya.N., Fan, A., Auli, M., and Grangier, D., Language modeling with gated convolutional networks, Proc. Mach. Learn. Res., 2017, vol. 70, pp. 933\u2013941.","journal-title":"Proc. Mach. Learn. Res."},{"key":"7598_CR14","doi-asserted-by":"publisher","first-page":"3604","DOI":"10.21437\/Interspeech.2019-3019","volume":"2019","author":"X. Li","year":"2019","unstructured":"Li, X., Chebiyyam, V., and Kirchhoff, K., Multi-stream network with temporal attention for environmental sound classification, Proc. Interspeech \n               2019, 2019, pp. 3604\u20133608. https:\/\/doi.org\/10.21437\/Interspeech.2019-3019","journal-title":"Proc. Interspeech"},{"key":"7598_CR15","doi-asserted-by":"publisher","first-page":"130327","DOI":"10.1109\/access.2019.2939495","volume":"7","author":"Z. Zhang","year":"2019","unstructured":"Zhang, Z., Xu, S., Zhang, S., Qiao, T., and Cao, S., Learning attentive representations for environmental sound classification, IEEE Access, 2019, vol. 7, pp. 130327\u2013130339. https:\/\/doi.org\/10.1109\/access.2019.2939495","journal-title":"IEEE Access"},{"key":"7598_CR16","doi-asserted-by":"publisher","unstructured":"Wang, Yo., Feng, C., and Anderson, D., A multi-channel temporal attention convolutional neural network model for environmental sound classification, ICASSP 2021 - 2021 IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2021, pp. 930\u2013934. https:\/\/doi.org\/10.1109\/icassp39728.2021.9413498","DOI":"10.1109\/icassp39728.2021.9413498"},{"key":"7598_CR17","doi-asserted-by":"publisher","unstructured":"Zhou, L., Zhou, Y., Qi, X., Hu, J., Lam, T.L., and Xu, Ya., Feature pyramid attention based residual neural network for environmental sound classification, 2022. https:\/\/doi.org\/10.48550\/arXiv.2205.14411","DOI":"10.48550\/arXiv.2205.14411"},{"key":"7598_CR18","doi-asserted-by":"publisher","first-page":"571","DOI":"10.21437\/Interspeech.2021-698","volume":"2021","author":"Yu. Gong","year":"2021","unstructured":"Gong, Yu., Chung, Yu-A., and Glass, J., AST: Audio spectrogram transformer, Proc. Interspeech \n               2021, 2021, pp. 571\u2013575. https:\/\/doi.org\/10.21437\/Interspeech.2021-698","journal-title":"Proc. Interspeech"},{"key":"7598_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-31654-9_23","volume-title":"Attention based convolutional recurrent neural network for environmental sound classification, Pattern Recognition and Computer Vision","author":"Z. Zhang","year":"2019","unstructured":"Zhang, Z., Xu, S., Qiao, T., Zhang, S., and Cao, S., Attention based convolutional recurrent neural network for environmental sound classification, Pattern Recognition and Computer Vision, Lin, Z., Wang, L., Yang, J., Eds., Lecture Notes in Computer Science, vol. 11857, Cham: Springer, 2019, pp. 261\u2013271. https:\/\/doi.org\/10.1007\/978-3-030-31654-9_23"},{"key":"7598_CR20","volume-title":"Acoustic scene classification using parallel combination of LSTM and CNN","author":"S. Bae","year":"2016","unstructured":"Bae, S., Choi, I., and Kim, N., Acoustic scene classification using parallel combination of LSTM and CNN, Detection Classification of Acoustic Scenes and Events, Budapest, 2016, pp. 11\u201315."},{"key":"7598_CR21","doi-asserted-by":"publisher","first-page":"311","DOI":"10.3103\/S0146411621040106","volume":"55","author":"Yu. Zhang","year":"2021","unstructured":"Zhang, Yu., Zeng, J., Li, Yo., and Chen, D., Convolutional neural network-gated recurrent unit neural network with feature fusion for environmental sound classification, Autom. Control Comput. Sci, 2021, vol. 55, pp. 311\u2013318. https:\/\/doi.org\/10.3103\/S0146411621040106","journal-title":"Autom. Control Comput. Sci"},{"key":"7598_CR22","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.patrec.2022.07.012","volume":"161","author":"S. Verbitskiy","year":"2022","unstructured":"Verbitskiy, S., Berikov, V., and Vyshegorodtsev, V., ERANNs: Efficient residual audio neural networks for audio pattern recognition, Pattern Recognit. Lett., 2022, vol. 161, pp. 38\u201344. https:\/\/doi.org\/10.1016\/j.patrec.2022.07.012","journal-title":"Pattern Recognit. Lett."},{"key":"7598_CR23","doi-asserted-by":"publisher","first-page":"1186","DOI":"10.21437\/interspeech.2020-1303","volume":"2020","author":"J. Sharma","year":"2020","unstructured":"Sharma, J., Granmo, O., and Goodwin, M., Environment sound classification using multiple feature channels and attention based deep convolutional neural network, Proc. Interspeech 2020, Shanghai, 2020, ISCA, 2020, pp.\u00a01186\u20131190. https:\/\/doi.org\/10.21437\/interspeech.2020-1303","journal-title":"ISCA"},{"key":"7598_CR24","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.3390\/s19071733","volume":"19","author":"Yu. Su","year":"2019","unstructured":"Su, Yu., Zhang, K., Wang, J., and Madani, K., Environment sound classification using a two-stream CNN based on decision-level fusion, Sensors, 2019, vol. 19, no. 7, p. 1733. https:\/\/doi.org\/10.3390\/s19071733","journal-title":"Sensors"},{"key":"7598_CR25","doi-asserted-by":"publisher","first-page":"5988","DOI":"10.3390\/app12125988","volume":"12","author":"J. Guo","year":"2022","unstructured":"Guo, J., Li, C., Sun, Z., Li, J., and Wang, P., A deep attention model for environmental sound classification from multi-feature data, Appl. Sci., 2022, vol. 12, no. 12, p. 5988. https:\/\/doi.org\/10.3390\/app12125988","journal-title":"Appl. Sci."},{"key":"7598_CR26","doi-asserted-by":"publisher","unstructured":"Zhu, B., Wang, C., Liu, F., Lei, J., Huang, Z., Peng, Yu., and Li, F., Learning environmental sounds with multi-scale convolutional neural network, 2018 Int. Joint Conf. on Neural Networks (IJCNN), Rio de Janeiro, 2018, IEEE, 2018, pp. 1\u20138. https:\/\/doi.org\/10.1109\/ijcnn.2018.8489641","DOI":"10.1109\/ijcnn.2018.8489641"},{"key":"7598_CR27","doi-asserted-by":"publisher","unstructured":"Dai, Yi., Gieseke, F., Oehmcke, S., Wu, Yi., and Barnard, K., Attentional feature fusion, 2021 IEEE Winter Conference on Applications of Computer Vision (WACV), Waikoloa, Hawaii, 2021, IEEE, 2021, pp. 3560\u20133569. https:\/\/doi.org\/10.1109\/wacv48630.2021.00360","DOI":"10.1109\/wacv48630.2021.00360"},{"key":"7598_CR28","doi-asserted-by":"publisher","unstructured":"Guzhov, A., Raue, F., Hees, J., and Dengel, A., Audioclip: Extending clip to image, text and audio, 2022 IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP), Singapore, 2022, IEEE, 2022, pp. 976\u2013980. https:\/\/doi.org\/10.1109\/icassp43922.2022.9747631","DOI":"10.1109\/icassp43922.2022.9747631"},{"key":"7598_CR29","doi-asserted-by":"publisher","first-page":"821","DOI":"10.21437\/interspeech.2020-1219","volume":"2020","author":"H. Wang","year":"2020","unstructured":"Wang, H., Zou, Yu., Chong, D., and Wang, W., Environmental sound classification with parallel temporal-spectral attention, Proc. Interspeech 2020, Shanghai, 2020, ISCA, 2020, pp. 821\u2013825. https:\/\/doi.org\/10.21437\/interspeech.2020-1219","journal-title":"ISCA"}],"container-title":["Automatic Control and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.3103\/S0146411623040119.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.3103\/S0146411623040119","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.3103\/S0146411623040119.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T22:02:28Z","timestamp":1773612148000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.3103\/S0146411623040119"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8]]},"references-count":29,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["7598"],"URL":"https:\/\/doi.org\/10.3103\/s0146411623040119","relation":{},"ISSN":["0146-4116","1558-108X"],"issn-type":[{"value":"0146-4116","type":"print"},{"value":"1558-108X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8]]},"assertion":[{"value":"10 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 October 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 August 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no conflicts of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"CONFLICT OF INTEREST"}}]}}