{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,7]],"date-time":"2025-08-07T21:03:06Z","timestamp":1754600586826,"version":"3.40.5"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,2,9]],"date-time":"2025-02-09T00:00:00Z","timestamp":1739059200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,9]],"date-time":"2025-02-09T00:00:00Z","timestamp":1739059200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62006010"],"award-info":[{"award-number":["62006010"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"R&D Program of Beijing Municipal Education Commission","award":["KM202210005029"],"award-info":[{"award-number":["KM202210005029"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s00034-025-03013-z","type":"journal-article","created":{"date-parts":[[2025,2,9]],"date-time":"2025-02-09T18:35:56Z","timestamp":1739126156000},"page":"4279-4300","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Information-Preserving Multilayer CTC Loss for Speech Recognition"],"prefix":"10.1007","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5001-0587","authenticated-orcid":false,"given":"Xianhong","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deyu","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenmeng","family":"Xiong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,9]]},"reference":[{"key":"3013_CR1","first-page":"12449","volume":"33","author":"A Baevski","year":"2020","unstructured":"A. Baevski, Y. Zhou, A. Mohamed, M. Auli, Wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"3013_CR2","doi-asserted-by":"crossref","unstructured":"D. Bahdanau, J. Chorowski, D. Serdyuk, P. Brakel, Y. Bengio, End-to-end attention-based large vocabulary speech recognition, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2016). pp. 4945\u20134949","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"3013_CR3","doi-asserted-by":"crossref","unstructured":"E. Battenberg, J. Chen, R. Child, A. Coates, Y.G.Y. Li, H. Liu, S. Satheesh, A. Sriram, Z. Zhu, Exploring neural transducers for end-to-end speech recognition, in 2017 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU) (IEEE, 2017), pp. 206\u2013213","DOI":"10.1109\/ASRU.2017.8268937"},{"key":"3013_CR4","doi-asserted-by":"crossref","unstructured":"H. Bu, J. Du, X. Na, B. Wu, H. Zheng, Aishell-1: an open-source mandarin speech corpus and a speech recognition baseline, in 2017 20th Conference of the Oriental Chapter of the International Coordinating Committee on Speech Databases and Speech I\/O Systems and Assessment (O-COCOSDA) (IEEE, 2017), pp. 1\u20135","DOI":"10.1109\/ICSDA.2017.8384449"},{"key":"3013_CR5","doi-asserted-by":"crossref","unstructured":"F.-J. Chang, J. Liu, M. Radfar, A. Mouchtaris, M. Omologo, A. Rastrow, S. Kunzmann, Context-aware transformer transducer for speech recognition, in 2021 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU) (IEEE, 2021). pp. 503\u2013510","DOI":"10.1109\/ASRU51503.2021.9687895"},{"key":"3013_CR6","doi-asserted-by":"crossref","unstructured":"C.-C. Chiu, T. N. Sainath, Y. Wu, R. Prabhavalkar, P. Nguyen, Z. Chen, A. Kannan, R.J. Weiss, K. Rao, E. Gonina, et al., State-of-the-art speech recognition with squence-to-sequence models, in Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Process (IEEE, 2018), pp. 4774\u20134778","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"3013_CR7","doi-asserted-by":"crossref","unstructured":"K. Deng, Z. Yang, S. Watanabe, Y. Higuchi, G. Cheng, P. Zhang, Improving non-autoregressive end-to-end speech recognition with pre-trained acoustic and language models, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2016), pp. 8522\u20138526","DOI":"10.1109\/ICASSP43922.2022.9746316"},{"key":"3013_CR8","doi-asserted-by":"crossref","unstructured":"S. Eom, E. Yoon, H.S. Yoon, C. Kim, M. Hasegawa-Johnson, C.D. Yoo, AdaMER-CTC: Connectionist temporal classification with adaptive maximum entropy regularization for automatic speech recognition, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2024), pp. 12707\u201312711","DOI":"10.1109\/ICASSP48485.2024.10446721"},{"key":"3013_CR9","doi-asserted-by":"crossref","unstructured":"R. Fan, W. Chu, P. Chang, J. Xiao, A. Alwan, An improved single step non-autoregressive transformer for automatic speech recognition. (2021) arXiv preprint arXiv:2106.09885","DOI":"10.21437\/Interspeech.2021-1955"},{"key":"3013_CR10","doi-asserted-by":"publisher","unstructured":"Y. Fujita, T. Komatsu, Y. Kida, Multi-sequence intermediate conditioning for CTC-based ASR (2022) https:\/\/doi.org\/10.48550\/arXiv.2204.00175","DOI":"10.48550\/arXiv.2204.00175"},{"key":"3013_CR11","doi-asserted-by":"crossref","unstructured":"X. Gong, Z. Zhou, Y. Qian, Knowledge transfer and distillation from autoregressive to non-autoregressive speech recognition. (2022) arXiv preprint arXiv:2207.10600","DOI":"10.21437\/Interspeech.2022-632"},{"key":"3013_CR12","unstructured":"A. Graves, N. Jaitly, Towards end-to-end speech recognition with recurrent neural networks, in International Conference on Machine Learning (PMLR, 2014), pp. 1764\u20131772"},{"key":"3013_CR13","doi-asserted-by":"crossref","unstructured":"A. Graves, S. Fern\u00e1ndez, F. Gomez, J. Schmidhuber, Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks, in Proceedings of the 23rd International conference on Machine Learning (2006), pp. 369\u2013376","DOI":"10.1145\/1143844.1143891"},{"key":"3013_CR14","doi-asserted-by":"crossref","unstructured":"A. Gulati, J. Qin, C.-C. Chiu, N. Parmar, Y. Zhang, J. Yu, W. Han, S. Wang, Z. Zhang, Y. Wu et al., Conformer: convolution-augmented transformer for speech recognition. (2020) arXiv preprint arXiv:2005.08100","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"3013_CR15","doi-asserted-by":"crossref","unstructured":"P. Guo, F. Boyer, X. Chang, T. Hayashi, Y. Higuchi, H. Inaguma, N. Kamo, C. Li, D. Garcia-Romero, J. Shi et al., Recent developments on espnet toolkit boosted by conformer, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2021), pp. 5874\u20135878","DOI":"10.1109\/ICASSP39728.2021.9414858"},{"key":"3013_CR16","doi-asserted-by":"crossref","unstructured":"H. Hadian, H. Sameti, D. Povey, S. Khudanpur, End-to-end speech recognition using lattice-free MMI, in Proceedings of Interspeech (2018), pp. 12\u201316","DOI":"10.21437\/Interspeech.2018-1423"},{"key":"3013_CR17","doi-asserted-by":"crossref","unstructured":"S. Han, Z. Lei, M. Xu, X. Na, Z. Huang, Enhancing CTC-based speech recognition with diverse modeling units, in Proceedings of Interspeech (2024), pp. 4583\u20134587","DOI":"10.21437\/Interspeech.2024-555"},{"key":"3013_CR18","unstructured":"A. Hannun, C. Case, J. Casper, B. Catanzaro, G. Diamos, E. Elsen, R. Prenger, S. Satheesh, S. Sengupta, A. Coates et al., Deep speech: scaling up end-to-end speech recognition (2014) arXiv preprint arXiv:1412.5567"},{"key":"3013_CR19","doi-asserted-by":"crossref","unstructured":"Y. Higuchi, K. Karube, T. Ogawa, T. Kobayashi, Hierarchical conditional end-to-end ASR with CTC and multi-granular subword units, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2022), pp. 7797\u20137801","DOI":"10.1109\/ICASSP43922.2022.9746580"},{"key":"3013_CR20","unstructured":"H. Inaguma, T. Kawahara, Alignment knowledge distillation for online streaming attention-based speech recognition, in IEEE\/ACM Transactions on Audio, Speech, and Language Processing (2021)"},{"key":"3013_CR21","doi-asserted-by":"publisher","first-page":"1013","DOI":"10.1007\/s11265-017-1292-0","volume":"90","author":"J Kang","year":"2018","unstructured":"J. Kang, W.-Q. Zhang, W.-W. Liu, J. Liu, M.T. Johnson, Lattice based transcription loss for end-to-end speech recognition. J. Signal Process. Syst. 90, 1013\u20131023 (2018)","journal-title":"J. Signal Process. Syst."},{"key":"3013_CR22","first-page":"9361","volume":"35","author":"S Kim","year":"2022","unstructured":"S. Kim, A. Gholami, A. Shaw, N. Lee, K. Mangalam, J. Malik, M.W. Mahoney, K. Keutzer, Squeezeformer: an efficient transformer for automatic speech recognition. In Adv. Neural Inf. Process. Syst. 35, 9361\u20139373 (2022)","journal-title":"In Adv. Neural Inf. Process. Syst."},{"key":"3013_CR23","doi-asserted-by":"crossref","unstructured":"S. Kriman, S. Beliaev, B. Ginsburg, J. Huang, O. Kuchaiev, V. Lavrukhin, R. Leary, J. Li, Y. Zhang, Quartznet: deep automatic speech recognition with 1d time-channel separable convolutions, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2019), pp. 6124\u20136128","DOI":"10.1109\/ICASSP40776.2020.9053889"},{"issue":"14","key":"3013_CR24","doi-asserted-by":"publisher","first-page":"5381","DOI":"10.3390\/s22145381","volume":"22","author":"GW Lee","year":"2022","unstructured":"G.W. Lee, H.K. Kim, Two-step joint optimization with auxiliary loss function for noise-robust speech recognition. Sensors 22(14), 5381 (2022)","journal-title":"Sensors"},{"key":"3013_CR25","doi-asserted-by":"crossref","unstructured":"J. Lee, S. Watanabe, Intermediate loss regularization for CTC-based speech recognition, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2021), pp. 6224\u20136228","DOI":"10.1109\/ICASSP39728.2021.9414594"},{"key":"3013_CR26","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/s00034-021-01786-7","volume":"41","author":"R Li","year":"2022","unstructured":"R. Li, F. Zhao, D. Pan, L. Dong, Speech enhancement based on binaural sound source localization and cosh measure wiener filtering. Circuits Syst. Signal Process. 41, 395\u2013424 (2022)","journal-title":"Circuits Syst. Signal Process."},{"key":"3013_CR27","doi-asserted-by":"crossref","unstructured":"Y. Li, L. Samarakoon, I. Fung, Improving non-autoregressive speech recognition with autoregressive pretraining, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2023), pp. 1\u20135","DOI":"10.1109\/ICASSP49357.2023.10096815"},{"key":"3013_CR28","doi-asserted-by":"crossref","unstructured":"D. Luo, X. Chen, M. Jia, C. Bao, Speech recognition method based on CTC multilayer loss, 2022 11th International Conference on Computing and Pattern Recognition (2022), pp. 392\u2013397","DOI":"10.1145\/3581807.3581864"},{"key":"3013_CR29","unstructured":"S. Majumdar, J. Balam, O. Hrinchuk, V. Lavrukhin, V. Noroozi, B. Ginsburg, Citrinet: closing the gap between non-autoregressive and autoregressive end-to-end models for automatic speech recognition (2021) arXiv preprint arXiv:2104.01721"},{"key":"3013_CR30","doi-asserted-by":"crossref","unstructured":"Y. Masuyama, X. Chang, S. Cornell, S. Watanabe, N. Ono, End-to-end integration of speech recognition, dereverberation, beamforming, and self-supervised learning representation, in 2022 IEEE Spoken Language Technology Workshop (SLT) (IEEE, 2023), pp. 260\u2013265","DOI":"10.1109\/SLT54892.2023.10023199"},{"key":"3013_CR31","doi-asserted-by":"crossref","unstructured":"Y. Miao, M. Gowayyed, F. Metze, EESEN: end-to-end speech recognition using deep RNN models and WFST-based decoding, in 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU) (IEEE, 2015), pp. 167\u2013174","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"3013_CR32","doi-asserted-by":"crossref","unstructured":"J. Nozaki, T. Komatsu, Relaxing the conditional independence assumption of CTC-based ASR by conditioning on intermediate predictions (2021) arXiv preprint arXiv:2104.02724","DOI":"10.21437\/Interspeech.2021-911"},{"issue":"1","key":"3013_CR33","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/s00034-022-02126-z","volume":"42","author":"G Pamisetty","year":"2023","unstructured":"G. Pamisetty, R. Sri Rama Murty, Prosody-TTS: an end-to-end speech synthesis system with prosody control. Circuits Syst. Signal Process. 42(1), 361\u2013384 (2023)","journal-title":"Circuits Syst. Signal Process."},{"key":"3013_CR34","doi-asserted-by":"crossref","unstructured":"V. Panayotov, G. Chen, D. Povey, S. Khudanpur, Librispeech: an ASR corpus based on public domain audio books. , in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2015), pp. 5206\u20135210","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"3013_CR35","doi-asserted-by":"publisher","first-page":"2454","DOI":"10.1007\/s00034-023-02570-5","volume":"43","author":"B Paul","year":"2023","unstructured":"B. Paul, S. Phadikar, RAttSR: a novel low-cost reconstructed attention-based end-to-end speech recognizer. Circuits Syst. Signal Process. 43, 2454\u20132476 (2023)","journal-title":"Circuits Syst. Signal Process."},{"issue":"1","key":"3013_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-019-0161-0","volume":"2019","author":"C-X Qin","year":"2019","unstructured":"C.-X. Qin, W.-L. Zhang, D. Qu, A new joint CTC-attention-based speech recognition model with multi-level multi-head attention. EURASIP J. Audio Speech Music Process. 2019(1), 1\u201312 (2019)","journal-title":"EURASIP J. Audio Speech Music Process."},{"issue":"1","key":"3013_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-021-00233-4","volume":"2022","author":"S Qin","year":"2022","unstructured":"S. Qin, L. Wang, S. Li, J. Dang, L. Pan, Improving low-resource Tibetan end-to-end ASR by multilingual and multilevel unit modeling. EURASIP J. Audio Speech Music Process. 2022(1), 1\u201310 (2022)","journal-title":"EURASIP J. Audio Speech Music Process."},{"key":"3013_CR38","doi-asserted-by":"crossref","unstructured":"K. Rao, H. Sak, R. Prabhavalkar, Exploring architectures, data and units for streaming end-to-end speech recognition with RNN-transducer, in 2017 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU) (IEEE, 2017), pp. 193\u2013199","DOI":"10.1109\/ASRU.2017.8268935"},{"key":"3013_CR39","doi-asserted-by":"crossref","unstructured":"L. Rumberg, C. Gebauer, H. Ehlert, U. L\u00fcdtke, J. Ostermann, Improving phonetic transcriptions of children\u2019s speech by pronunciation modelling with constrained CTC-decoding, in Proceedings of Interspeech (2022), pp. 1357\u20131361","DOI":"10.21437\/Interspeech.2022-332"},{"key":"3013_CR40","doi-asserted-by":"crossref","unstructured":"R. Subramanian, P. Aruchamy, An effective speech emotion recognition model for multi-regional languages using threshold-based feature selection algorithm, in Circuits, Systems, and Signal Processing (pp. 1\u201330) (2023)","DOI":"10.1007\/s00034-023-02571-4"},{"key":"3013_CR41","doi-asserted-by":"publisher","first-page":"916","DOI":"10.1007\/s00034-023-02480-6","volume":"43","author":"J Sun","year":"2023","unstructured":"J. Sun, J. Zhu, J. Shao, Long-time speech emotion recognition using feature compensation and accentuation-based fusion. Circuits Syst. Signal Process. 43, 916\u2013940 (2023)","journal-title":"Circuits Syst. Signal Process."},{"key":"3013_CR42","doi-asserted-by":"crossref","unstructured":"T. Takagi, Y. Wakabayashi, A. Ogawa, N. Kitaoka, Text-only domain adaptation for CTC-based speech recognition through substitution of implicit linguistic information in the search space, in Proceedings of Interspeech (2024), pp. 287\u2013291","DOI":"10.21437\/Interspeech.2024-2222"},{"key":"3013_CR43","doi-asserted-by":"crossref","unstructured":"Z. Tian, H. Xiang, M. Li, F. Lin, K. Ding, and G. Wan. Peak-first CTC: Reducing the peak latency of CTC models by applying peak-first regularization, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2023). pp. 1\u20135","DOI":"10.1109\/ICASSP49357.2023.10095377"},{"issue":"9","key":"3013_CR44","doi-asserted-by":"publisher","first-page":"5283","DOI":"10.1007\/s00034-022-02008-4","volume":"41","author":"K Tripathi","year":"2022","unstructured":"K. Tripathi, K.S. Rao, Cyclegan-based speech mode transformation model for robust multilingual ASR. Circuits, Systems, and Signal Processing 41(9), 5283\u20135305 (2022)","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"3013_CR45","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A.N. Gomez, Attention is all you need, in Advances in Neural Information Processing Systems 30 (2017)"},{"key":"3013_CR46","unstructured":"Y. Wang, X. Deng, S. Pu, Z. Huang, Residual convolutional CTC networks for automatic speech recognition (2017) arXiv preprint arXiv:1702.07793"},{"key":"3013_CR47","doi-asserted-by":"crossref","unstructured":"Y. Wang, R. Liu, F. Bao, H. Zhang, G. Gao, Alignment-learning based single-step decoding for accurate and fast non-autoregressive speech recognition, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2022), pp. 8292\u20138296","DOI":"10.1109\/ICASSP43922.2022.9746227"},{"issue":"4","key":"3013_CR48","doi-asserted-by":"publisher","first-page":"927","DOI":"10.1109\/TASL.2010.2070495","volume":"19","author":"Q Wu","year":"2022","unstructured":"Q. Wu, L. Zhang, G. Shi, Robust multifactor speech feature extraction based on Gabor analysis. IEEE Trans. Audio Speech Lang. Process. 19(4), 927\u2013936 (2022)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"3013_CR49","doi-asserted-by":"crossref","unstructured":"Y. Yang, Y. Li, B. Du, Improving CTC-based ASR models with gated interlayer collaboration, in Proceeding IEEE International Conference on Acoustics, Speech, and Signal Processing (IEEE, 2023), pp. 1\u20135","DOI":"10.1109\/ICASSP49357.2023.10094820"},{"key":"3013_CR50","doi-asserted-by":"crossref","unstructured":"Z. Yao, D. Wu, X. Wang, B. Zhang, F. Yu, C. Yang, Z. Peng, X. Chen, L. Xie, X. Lei, WeNet: production oriented streaming and non-streaming end-to-end speech recognition toolkit, in Proceedings of Interspeech, Brno, Czech Republic (IEEE 2021)","DOI":"10.21437\/Interspeech.2021-1983"},{"key":"3013_CR51","unstructured":"Z. Yao, L. Guo, X. Yang, W. Kang, F. Kuang, Y. Yang, Z. Jin, L. Lin, D. Povey, Zipformer: a faster and better encoder for automatic speech recognition, in Twelfth International Conference on Learning Representations (ICLR) (2024) pp. 1\u201316"},{"key":"3013_CR52","unstructured":"B. Zhang, D. Wu, Z. Yao, X. Wang, F. Yu, C. Yang, L. Guo, Y. Hu, L. Xie, X. Lei, Unified streaming and non-streaming two-pass end-to-end model for speech recognition (2020) arXiv preprint arXiv:2012.05481"},{"key":"3013_CR53","doi-asserted-by":"crossref","unstructured":"B. Zhang, D. Wu, Z. Peng, X. Song, Z. Yao, H. Lv, L. Xie, C. Yang, F. Pan, J. Niu, WeNet 2.0: more productive end-to-end speech recognition toolkit (2022) arXiv preprint arXiv:2203.15455","DOI":"10.21437\/Interspeech.2022-483"},{"issue":"7","key":"3013_CR54","doi-asserted-by":"publisher","first-page":"4068","DOI":"10.1007\/s00034-022-01974-z","volume":"41","author":"Y Zhao","year":"2022","unstructured":"Y. Zhao, R. Togneri, V. Sreeram, Multi-task learning-based spoofing-robust automatic speaker verification system. Circuits Syst. Signal Process. 41(7), 4068\u20134089 (2022)","journal-title":"Circuits Syst. Signal Process."},{"key":"3013_CR55","first-page":"14549","volume":"35","author":"C Zheng","year":"2021","unstructured":"C. Zheng, X. Peng, Y. Zhang, S. Srinivasan, Y. Lu, Interactive speech and noise modeling for speech enhancement. Proc. AAAI Conf. Artif. Intell. 35, 14549\u201314557 (2021)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"3013_CR56","unstructured":"https:\/\/github.com\/SpeechColab\/Leaderboard"},{"key":"3013_CR57","unstructured":"https:\/\/pytorch.org\/audio\/0.12.1\/torchaudio.html"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03013-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-025-03013-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03013-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T20:10:48Z","timestamp":1747253448000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-025-03013-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,9]]},"references-count":57,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["3013"],"URL":"https:\/\/doi.org\/10.1007\/s00034-025-03013-z","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2025,2,9]]},"assertion":[{"value":"4 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 January 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}