{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T18:44:00Z","timestamp":1770749040207,"version":"3.50.0"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T00:00:00Z","timestamp":1752796800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T00:00:00Z","timestamp":1752796800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51975003 and 51705001"],"award-info":[{"award-number":["51975003 and 51705001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00034-025-03235-1","type":"journal-article","created":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T17:11:55Z","timestamp":1752858715000},"page":"9362-9384","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Transformer-Based Neural Network for Acoustic Scene Classification Using Potent Self-attention"],"prefix":"10.1007","volume":"44","author":[{"given":"Ya-Hu","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Dong-Xu","family":"Lin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0604-2108","authenticated-orcid":false,"given":"Lin","family":"Geng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,18]]},"reference":[{"key":"3235_CR1","doi-asserted-by":"publisher","DOI":"10.3390\/app10062020","author":"J Abe\u00dfer","year":"2020","unstructured":"J. Abe\u00dfer, A review of deep learning based methods for acoustic scene classification. Appl. Sci. (2020). https:\/\/doi.org\/10.3390\/app10062020","journal-title":"Appl. Sci."},{"key":"3235_CR2","doi-asserted-by":"publisher","first-page":"881","DOI":"10.1121\/1.2750160","volume":"122","author":"JJ Aucouturier","year":"2007","unstructured":"J.J. Aucouturier, B. Defreville, F. Pachet, The bag-of-frames approach to audio pattern recognition: a sufficient model for urban soundscapes but not for polyphonic music. J. Acoust. Soc. Am. 122, 881\u2013891 (2007). https:\/\/doi.org\/10.1121\/1.2750160","journal-title":"J. Acoust. Soc. Am."},{"issue":"3","key":"3235_CR3","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1109\/MSP.2014.2326181","volume":"32","author":"D Barchiesi","year":"2015","unstructured":"D. Barchiesi, D. Giannoulis, D. Stowell, M. Plumbley, Acoustic scene classification: classifying environments from the sounds they produce. IEEE Signal Process. Mag. 32(3), 16\u201334 (2015). https:\/\/doi.org\/10.1109\/MSP.2014.2326181","journal-title":"IEEE Signal Process. Mag."},{"key":"3235_CR4","doi-asserted-by":"publisher","unstructured":"A.M. Basbug, M. Sert, Acoustic scene classification using spatial pyramid pooling with convolutional neural networks. In: Proceedings of 13th IEEE International Conference on Semantic Computing, California, USA, January 30\u2013February 1, 2019. https:\/\/doi.org\/10.1109\/ICOSC.2019.8665547","DOI":"10.1109\/ICOSC.2019.8665547"},{"key":"3235_CR5","doi-asserted-by":"publisher","unstructured":"V. Bisot, S. Essid, G. Richard, HOG and subband power distribution image features for acoustic scene classification. In: Proceedings of 23rd European Signal Processing Conference, Nice, France, August 31\u2013September 4, 2015. https:\/\/doi.org\/10.1109\/EUSIPCO.2015.7362477","DOI":"10.1109\/EUSIPCO.2015.7362477"},{"key":"3235_CR6","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1016\/j.matcom.2023.08.007","volume":"222","author":"Y Cao","year":"2024","unstructured":"Y. Cao, A. Chandrasekar, T. Radhika, V. Vijayakumar, Input-to-state stability of stochastic Markovian jump genetic regulatory networks. Math. Comput. Simul 222, 174\u2013187 (2024). https:\/\/doi.org\/10.1016\/j.matcom.2023.08.007","journal-title":"Math. Comput. Simul"},{"key":"3235_CR7","unstructured":"W. Cao, Y. Li, Q. Huang, Acoustic scene classification using lightweight ResNet with attention, Tech. rep. DCASE2021 Challenge, July 2021."},{"issue":"4","key":"3235_CR8","doi-asserted-by":"publisher","first-page":"373","DOI":"10.2478\/jaiscr-2024-0020","volume":"14","author":"Y Cao","year":"2024","unstructured":"Y. Cao, A.R. Subhashri, A. Chandrasekar, T. Radhika, K. Przybyszewski, Exponential state estimation for delayed competitive neural network via stochastic sampled-data control with Markov jump parameters under actuator failure. J. Artif. Intell. Soft Comput. 14(4), 373\u2013385 (2024). https:\/\/doi.org\/10.2478\/jaiscr-2024-0020","journal-title":"J. Artif. Intell. Soft Comput."},{"key":"3235_CR9","unstructured":"M. Cui, F. Kui, L. Guo, Consistency learning based acoustic scene classification with res-attention. Tech. rep., DCASE2021 Challenge, July 2021."},{"key":"3235_CR10","doi-asserted-by":"publisher","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, et al., An image is worth 16\u00d716 words: transformers for image recognition at scale. In: Proceedings of the 9th International Conference on Learning Representations, 2021. https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"issue":"1","key":"3235_CR11","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1109\/TSA.2005.854103","volume":"14","author":"AJ Eronen","year":"2006","unstructured":"A.J. Eronen, V.T. Peltonen, J.T. Tuomi et al., Audio-based context recognition. IEEE Trans. Audio Speech Lang. Process. 14(1), 321\u2013329 (2006). https:\/\/doi.org\/10.1109\/TSA.2005.854103","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"3235_CR12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-51971-1_5","volume-title":"Artificial Intelligence and Bioinspired Computational Methods. CSOC 2020. Advances in Intelligent Systems and Computing","author":"HM Farghaly","year":"2020","unstructured":"H.M. Farghaly, A.A. Ali, T.A. El-Hafeez, Developing an efficient method for automatic threshold detection based on hybrid feature selection approach, in Artificial Intelligence and Bioinspired Computational Methods. CSOC 2020. Advances in Intelligent Systems and Computing. ed. by R. Silhavy (Springer, Cham, 2020). https:\/\/doi.org\/10.1007\/978-3-030-51971-1_5"},{"issue":"3","key":"3235_CR13","first-page":"39","volume":"164","author":"HM Farghaly","year":"2020","unstructured":"H.M. Farghaly, A.A. Ali, T.A. El-Hafeez, Building an effective and accurate associative classifier based on support vector machine. Sylwan 164(3), 39\u201356 (2020)","journal-title":"Sylwan"},{"key":"3235_CR14","unstructured":"E. Fonseca, R. Gong, D. Bogdanov, O. Slizovskaia, E. Gomez, X. Serra, Acoustic scene classification by ensembling gradient boosting machine and convolutional neural networks. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events Workshop, Munich, Germany, November 16\u201317, 2017. https:\/\/api.semanticscholar.org\/CorpusID:44972262"},{"key":"3235_CR15","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1186\/s40537-024-00944-3","volume":"11","author":"M Ghada","year":"2024","unstructured":"M. Ghada, M. Hamdi, A. El Hafeez, E. ElAraby Mohamed, Feature reduction for hepatocellular carcinoma prediction using machine learning algorithms. J. Big Data. 11, 88 (2024). https:\/\/doi.org\/10.1186\/s40537-024-00944-3","journal-title":"J. Big Data."},{"key":"3235_CR16","doi-asserted-by":"publisher","first-page":"1507","DOI":"10.1038\/s41598-024-51615-5","volume":"14","author":"E Hassan","year":"2024","unstructured":"E. Hassan, T. Abd El Hafeez, M.Y. Shams, Optimizing classification of diseases through language model analysis of symptoms. Sci. Rep. 14, 1507 (2024). https:\/\/doi.org\/10.1038\/s41598-024-51615-5","journal-title":"Sci. Rep."},{"key":"3235_CR17","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1186\/s40537-024-00985-8","volume":"11","author":"E Hassan","year":"2024","unstructured":"E. Hassan, S. Elbedwehy, M.Y. Shams, T. Abd El Hafeez, N. El Rashidy, Optimizing poultry audio signal classification with deep learning and burn layer fusion. J. Big Data. 11, 135 (2024). https:\/\/doi.org\/10.1186\/s40537-024-00985-8","journal-title":"J. Big Data."},{"key":"3235_CR18","unstructured":"Y. Jeong, S. Park, T. Lee, Trident ResNets with low-complexity for acoustic scene classification. Tech. rep., DCASE2021 Challenge, July 2021."},{"key":"3235_CR19","doi-asserted-by":"publisher","unstructured":"B. Kim, S. Yang, J. Kim, S. Chang, QTI submission to DCASE 2021: residual normalization for device-imbalanced acoustic scene classification with efficient design. (2022). https:\/\/doi.org\/10.48550\/arXiv.2206.13909","DOI":"10.48550\/arXiv.2206.13909"},{"key":"3235_CR20","doi-asserted-by":"publisher","first-page":"1987","DOI":"10.1109\/TASLP.2021.3082307","volume":"29","author":"K Koutini","year":"2021","unstructured":"K. Koutini, H. Eghbal-zadeh, G. Widmer, Receptive field regularization techniques for audio classification and tagging with deep convolution neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 1987\u20132000 (2021). https:\/\/doi.org\/10.1109\/TASLP.2021.3082307","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"3235_CR21","unstructured":"K. Koutini, S. Jan, G. Widmer, CPJKU submission to DCASE21: Cross-device audio scene classification with wide sparse frequency-damped CNNs, Tech. rep., DCASE2021 Challenge, July 2021."},{"key":"3235_CR22","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1016\/j.knosys.2016.01.029","volume":"98","author":"Y Leng","year":"2016","unstructured":"Y. Leng, C. Sun, X.Y. Xu, Q. Yuan, S.N. Xing, H.L. Wan, J.J. Wang, D.W. Li, Employing unlabeled data to improve the classification performance of SVM, and its application in audio event classification. Knowl. Based Syst. 98, 117\u2013129 (2016). https:\/\/doi.org\/10.1016\/j.knosys.2016.01.029","journal-title":"Knowl. Based Syst."},{"key":"3235_CR23","doi-asserted-by":"publisher","first-page":"105600","DOI":"10.1016\/j.knosys.2020.105600","volume":"195","author":"Y Leng","year":"2020","unstructured":"Y. Leng, W. Zhao, C. Lin, C.L. Sun, R.Y. Wang, Q. Yuan, D.W. Li, LDA-based data augmentation algorithm for acoustic scene classification. Knowl. Based Syst. 195, 105600 (2020). https:\/\/doi.org\/10.1016\/j.knosys.2020.105600","journal-title":"Knowl. Based Syst."},{"key":"3235_CR24","doi-asserted-by":"publisher","first-page":"110460","DOI":"10.1016\/j.knosys.2023.1104605","volume":"268","author":"Y Leng","year":"2023","unstructured":"Y. Leng, J. Zhuang, J. Pan, C. Sun, Multitask learning for acoustic scene classification with topic-based soft labels and a mutual attention mechanism. Knowl. Based Syst. 268, 110460 (2023). https:\/\/doi.org\/10.1016\/j.knosys.2023.1104605","journal-title":"Knowl. Based Syst."},{"key":"3235_CR25","unstructured":"Y. Li, X. Li, The SEIE-SCUT systems for IEEE AASP challenge on DCASE 2017: Deep learning techniques for audio representation and classification. Tech. rep. DCASE2017 challenge, September 2017."},{"key":"3235_CR26","doi-asserted-by":"publisher","unstructured":"S. Li, X. Lu, P. Shen, R. Takashima, T. Kawahara, H. Kawai, Incremental training and constructing the very deep convolution residual network acoustic models. In: Proceedings of IEEE Automatic Speech Recognition and Understanding Workshop, Okinawa, Japan, December 16\u201320, 2017. https:\/\/doi.org\/10.1109\/ASRU.2017.8268939","DOI":"10.1109\/ASRU.2017.8268939"},{"key":"3235_CR27","doi-asserted-by":"publisher","unstructured":"I. Loshchilov, F. Hutter, Decoupled weight decay regularization. (2017). https:\/\/doi.org\/10.48550\/arXiv.1711.05101","DOI":"10.48550\/arXiv.1711.05101"},{"issue":"7","key":"3235_CR28","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1109\/TSA.2002.804546","volume":"10","author":"L Lu","year":"2002","unstructured":"L. Lu, H. Zhang, H. Jiang, Content analysis for audio classification and segmentation. IEEE Trans. Speech Audio Process. 10(7), 504\u2013516 (2002). https:\/\/doi.org\/10.1109\/TSA.2002.804546","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"3235_CR29","doi-asserted-by":"publisher","first-page":"7035","DOI":"10.1007\/s00034-022-02107-2","volume":"41","author":"A Madhu","year":"2022","unstructured":"A. Madhu, K. Suresh, AtResNet: residual Atrous CNN with multi-scale feature representation for low complexity acoustic scene classification. Circuits Syst. Signal Process. 41, 7035\u20137056 (2022). https:\/\/doi.org\/10.1007\/s00034-022-02107-2","journal-title":"Circuits Syst. Signal Process."},{"key":"3235_CR30","unstructured":"T. Maka, Audio feature space analysis for acoustic scene classification. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events Workshop, Surrey, UK, November 19\u201320, 2018. https:\/\/api.semanticscholar.org\/CorpusID:245426612"},{"key":"3235_CR31","doi-asserted-by":"publisher","unstructured":"I. Mart\u00edn-Morat\u00f3, T. Heittola, A. Mesaros, T. Virtanen, Low-complexity acoustic scene classification for multi-device audio: analysis of DCASE 2021 challenge systems. In Proceedings of the Detection and Classification of Acoustic Scenes and Events 2021 Workshop, Online, November 15\u201319, 2021. In: Proceedings of the 6th Workshop on Detection and Classication of Acoustic Scenes and Events (DCASE 2021) (pp. 85-89). DCASE. https:\/\/doi.org\/10.5281\/zenodo.5770113","DOI":"10.5281\/zenodo.5770113"},{"key":"3235_CR32","doi-asserted-by":"crossref","unstructured":"M.D. McDonnell, W. Gao, Acoustic scene classification using deep residual networks with late fusion of separated high and low frequency paths. Tech. rep. DCASE2019 challenge, June 2019.","DOI":"10.1109\/ICASSP40776.2020.9053274"},{"key":"3235_CR33","doi-asserted-by":"publisher","unstructured":"A. Mesaros, T. Heittola, T. Virtanen, A multi-device dataset for urban acoustic scene classification. (2018). https:\/\/doi.org\/10.48550\/arXiv.1807.09840","DOI":"10.48550\/arXiv.1807.09840"},{"key":"3235_CR34","doi-asserted-by":"publisher","unstructured":"D. Misra, Mish: A self regularized non-monotonic activation function. (2020). https:\/\/doi.org\/10.48550\/arXiv.1908.08681","DOI":"10.48550\/arXiv.1908.08681"},{"key":"3235_CR35","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1186\/s12911-024-02682-1","volume":"24","author":"G Mostafa","year":"2024","unstructured":"G. Mostafa, H. Mahmoud, T. Abd El Hafeez, M.E. ElAraby, The power of deep learning in simplifying feature selection for hepatocellular carcinoma: a review. BMC Med. Inform. Decis. Mak. 24, 287 (2024). https:\/\/doi.org\/10.1186\/s12911-024-02682-1","journal-title":"BMC Med. Inform. Decis. Mak."},{"key":"3235_CR36","doi-asserted-by":"publisher","first-page":"2835","DOI":"10.1007\/s00521-023-09204-6","volume":"36","author":"A Omar","year":"2024","unstructured":"A. Omar, T. Abd El-Hafeez, Optimizing epileptic seizure recognition performance with feature scaling and dropout layers. Neural Comput. Appl. 36, 2835\u20132852 (2024). https:\/\/doi.org\/10.1007\/s00521-023-09204-6","journal-title":"Neural Comput. Appl."},{"key":"3235_CR37","doi-asserted-by":"publisher","unstructured":"H. Phan, O. Ch\u00e9n, L. Pham, P. Koch, M. Vos, I. Mcloughlin, A. Mertins, Spatio-temporal attention pooling for audio scene classification. In: Proceedings of Interspeech, Graz, Austria, September 15\u201319, 2019. https:\/\/doi.org\/10.48550\/arXiv.1904.03543","DOI":"10.48550\/arXiv.1904.03543"},{"key":"3235_CR38","doi-asserted-by":"publisher","unstructured":"H. Phan, P. Koch, F. Katzberg, M. Maass, R. Mazur, A. Mertins, Audio scene classification with deep recurrent neural networks. In: Proceedings of Interspeech, Stockholm, Sweden, August 20\u201324, 2017. https:\/\/doi.org\/10.48550\/arXiv.1703.04770","DOI":"10.48550\/arXiv.1703.04770"},{"key":"3235_CR39","unstructured":"M. Plata, Deep neural networks with supported clusters preclassification procedure for acoustic scene recognition. Tech. rep. DCASE2019 Challenge, June 2019."},{"key":"3235_CR40","doi-asserted-by":"publisher","unstructured":"R. Radhakrishnan, A. Divakaran, P. Smaragdis, Audio analysis for surveillance applications. In: Proceedings of IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, New York, USA, October 16\u201319, 2005. https:\/\/doi.org\/10.1109\/ASPAA.2005.1540194","DOI":"10.1109\/ASPAA.2005.1540194"},{"issue":"1","key":"3235_CR41","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TASLP.2014.2375575","volume":"23","author":"A Rakotomamonjy","year":"2015","unstructured":"A. Rakotomamonjy, G. Gasso, Histogram of gradients of time-frequency representations for audio scene classification. IEEE\/ACM Trans. Audio Speech Lang. Process. 23(1), 142\u2013153 (2015). https:\/\/doi.org\/10.1109\/TASLP.2014.2375575","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"3235_CR42","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.apacoust.2018.12.006","volume":"148","author":"VS Roneel","year":"2019","unstructured":"V.S. Roneel, J.M. Tom, Acoustic event recognition using cochleagram image and convolutional neural networks. Appl. Acoust. 148, 62\u201366 (2019). https:\/\/doi.org\/10.1016\/j.apacoust.2018.12.006","journal-title":"Appl. Acoust."},{"key":"3235_CR43","doi-asserted-by":"publisher","unstructured":"B. Schilit, N. Adams, R. Want, Context-aware computing applications. In: Proceedings of 1st Workshop Mobile Computing Systems and Applications, California, USA, December 8\u20139, 1994. https:\/\/doi.org\/10.1109\/WMCSA.1994.16","DOI":"10.1109\/WMCSA.1994.16"},{"key":"3235_CR44","unstructured":"H. Seo, J. Park, Y. Park, Acoustic scene classification using various pre-processed features and convolution neural networks. Tech. rep., DCASE2019 Challenge, June 2019."},{"key":"3235_CR45","doi-asserted-by":"publisher","first-page":"123608","DOI":"10.1016\/j.eswa.2024.123608","volume":"249","author":"MY Shams","year":"2024","unstructured":"M.Y. Shams, T.A.E. Hafeez, E. Hassan, Acoustic data detection in large-scale emergency vehicle sirens and road noise dataset. Expert Syst. Appl. 249, 123608 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2024.123608","journal-title":"Expert Syst. Appl."},{"key":"3235_CR46","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1007\/s00034-023-02478-0","volume":"43","author":"V Spoorthy","year":"2024","unstructured":"V. Spoorthy, S.G. Koolagudi, Bi-level acoustic scene classification using lightweight deep learning model. Circuits Syst. Signal Process 43, 388\u2013407 (2024). https:\/\/doi.org\/10.1007\/s00034-023-02478-0","journal-title":"Circuits Syst. Signal Process"},{"key":"3235_CR47","doi-asserted-by":"publisher","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, et al, Attention is all you need. (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.03762","DOI":"10.48550\/arXiv.1706.03762"},{"key":"3235_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107502","volume":"170","author":"S Waldekar","year":"2020","unstructured":"S. Waldekar, G. Saha, Two-level fusion-based acoustic scene classification. Appl. Acoust. 170, 107502 (2020). https:\/\/doi.org\/10.1016\/j.apacoust.2020.107502","journal-title":"Appl. Acoust."},{"key":"3235_CR49","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2022.103450","volume":"123","author":"W Xie","year":"2022","unstructured":"W. Xie, Q.H. He, Z.T. Yu, Y.X. Li, Deep mutual attention network for acoustic scene classification. Digit. Signal Process. 123, 103450 (2022). https:\/\/doi.org\/10.1016\/j.dsp.2022.103450","journal-title":"Digit. Signal Process."},{"key":"3235_CR50","doi-asserted-by":"publisher","DOI":"10.1002\/9780470222867","author":"Y Xu","year":"2007","unstructured":"Y. Xu, W. Li, K. Lee, Intelligent wearable interfaces. John Wiley & Sons (2007). https:\/\/doi.org\/10.1002\/9780470222867","journal-title":"John Wiley & Sons"},{"key":"3235_CR51","doi-asserted-by":"publisher","unstructured":"L. Yang, X. Chen, L. Tao, X. Gu, Multi-scale fusion and channel weighted CNN for acoustic scene classification. In: Proceedings of 2nd International Conference on Signal Processing and Machine Learning, New York, USA, November 27\u201329, 2019. https:\/\/doi.org\/10.1145\/3372806.3372809","DOI":"10.1145\/3372806.3372809"},{"key":"3235_CR52","doi-asserted-by":"publisher","unstructured":"W. Yin, K. Kann, M. Yu, H. Sch\u00fctze, Comparative study of CNN and RNN for natural language processing. (2017). https:\/\/doi.org\/10.48550\/arXiv.1702.01923","DOI":"10.48550\/arXiv.1702.01923"},{"key":"3235_CR53","doi-asserted-by":"publisher","first-page":"950","DOI":"10.1109\/LSP.2020.2996085","volume":"27","author":"L Zhang","year":"2020","unstructured":"L. Zhang, J. Han, Z. Shi, Learning temporal relations from semantic neighbors for acoustic scene classification. IEEE Signal Process. Letters. 27, 950\u2013954 (2020). https:\/\/doi.org\/10.1109\/LSP.2020.2996085","journal-title":"IEEE Signal Process. Letters."},{"key":"3235_CR54","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2022.108819","volume":"195","author":"T Zhang","year":"2022","unstructured":"T. Zhang, J. Liang, G. Feng, Adaptive time-frequency feature resolution network for acoustic scene classification. Appl. Acoust. 195, 108819 (2022). https:\/\/doi.org\/10.1016\/j.apacoust.2022.108819","journal-title":"Appl. Acoust."},{"issue":"8","key":"3235_CR55","doi-asserted-by":"publisher","first-page":"1216","DOI":"10.1109\/TASLP.2019.2913091","volume":"27","author":"T Zhang","year":"2019","unstructured":"T. Zhang, J. Wu, Constrained learned feature extraction for acoustic scene classification. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(8), 1216\u20131228 (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2913091","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"3235_CR56","doi-asserted-by":"publisher","unstructured":"T. Zhang, K. Zhang, J. Wu, Data independent sequence augmentation method for acoustic scene classification. In: Proceedings of Interspeech, Hyderabad, India, September 2\u20136, 2018. https:\/\/doi.org\/10.21437\/Interspeech.2018-1250","DOI":"10.21437\/Interspeech.2018-1250"},{"key":"3235_CR57","doi-asserted-by":"publisher","unstructured":"T. Zhang, K. Zhang, J. Wu, Temporal transformer networks for acoustic scene classification. In: Proceedings of Interspeech, Hyderabad, India, September 2\u20136, 2018. https:\/\/doi.org\/10.21437\/Interspeech.2018-1152","DOI":"10.21437\/Interspeech.2018-1152"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03235-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-025-03235-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03235-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,30]],"date-time":"2025-11-30T03:29:09Z","timestamp":1764473349000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-025-03235-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,18]]},"references-count":57,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["3235"],"URL":"https:\/\/doi.org\/10.1007\/s00034-025-03235-1","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,18]]},"assertion":[{"value":"18 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 June 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 July 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No potential conflict of interest was reported by the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}