{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T02:58:39Z","timestamp":1772333919524,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Research and Development Planning in Key Areas of Guangdong Province","award":["2021B0202070001"],"award-info":[{"award-number":["2021B0202070001"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-026-08338-3","type":"journal-article","created":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T02:39:02Z","timestamp":1772332742000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fine-tuning Whisper for speech recognition in aquatic product inspection tasks"],"prefix":"10.1007","volume":"82","author":[{"given":"Ming","family":"Chen","sequence":"first","affiliation":[]},{"given":"Zhanwang","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Wenjuan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Lina","family":"Pei","sequence":"additional","affiliation":[]},{"given":"Yan","family":"Ge","sequence":"additional","affiliation":[]},{"given":"Yibo","family":"Zou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,3,1]]},"reference":[{"issue":"9","key":"8338_CR1","doi-asserted-by":"publisher","first-page":"e29687","DOI":"10.1016\/j.heliyon.2024.e29687","volume":"10","author":"F Musavi","year":"2024","unstructured":"Musavi F, Hekmatshoar R, Fallahi M, Moradi A, Yazdani-Aval M (2024) Identifying and preventing human error in the sugar production process: a multi-stage approach using hta, hec and phea techniques. Heliyon 10(9):e29687","journal-title":"Heliyon"},{"issue":"12","key":"8338_CR2","doi-asserted-by":"publisher","first-page":"1936","DOI":"10.3390\/foods13121936","volume":"13","author":"L Zhang","year":"2024","unstructured":"Zhang L, Yang Q, Zhu Z (2024) The application of multi-parameter multi-modal technology integrating biological sensors and artificial intelligence in the rapid detection of food contaminants. Foods 13(12):1936","journal-title":"Foods"},{"key":"8338_CR3","doi-asserted-by":"publisher","first-page":"108225","DOI":"10.1016\/j.foodcont.2021.108225","volume":"129","author":"L Ni","year":"2021","unstructured":"Ni L, Chen D, Fu H, Xie Q, Lu Y, Wang X, Zhao Y, Chen L (2021) Residual levels of antimicrobial agents and heavy metals in 41 species of commonly consumed aquatic products in shanghai, china, and cumulative exposure risk to children and teenagers. Food Control 129:108225","journal-title":"Food Control"},{"issue":"3","key":"8338_CR4","first-page":"88","volume":"6","author":"J Vajpai","year":"2016","unstructured":"Vajpai J, Bora A (2016) Industrial applications of automatic speech recognition systems. Int J Eng Res Appl 6(3):88\u201395","journal-title":"Int J Eng Res Appl"},{"key":"8338_CR5","doi-asserted-by":"crossref","unstructured":"Tur G, Stolcke A, Voss L, Dowding J, Favre B, Fern\u00e1ndez R, Frampton M, Frandsen M, Frederickson C, Graciarena M, et al (2008) The calo meeting speech recognition and understanding system. In: 2008 IEEE Spoken Language Technology Workshop, pp. 69\u201372","DOI":"10.1109\/SLT.2008.4777842"},{"key":"8338_CR6","unstructured":"Tzoukermann E, Miller C (2018) Evaluating automatic speech recognition in translation. In: Proceedings of the 13th Conference of the Association for Machine Translation in the Americas (Volume 2: User Track), pp. 294\u2013302"},{"issue":"2","key":"8338_CR7","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1007\/s10772-023-10033-0","volume":"26","author":"M Dua","year":"2023","unstructured":"Dua M, Akanksha Dua S (2023) Noise robust automatic speech recognition: review and analysis. Int J Speech Technol 26(2):475\u2013519","journal-title":"Int J Speech Technol"},{"key":"8338_CR8","doi-asserted-by":"publisher","first-page":"131858","DOI":"10.1109\/ACCESS.2021.3112535","volume":"9","author":"S Alharbi","year":"2021","unstructured":"Alharbi S, Alrazgan M, Alrashed A, Alnomasi T, Almojel R, Alharbi R, Alharbi S, Alturki S, Alshehri F, Almojil M (2021) Automatic speech recognition: systematic literature review. Ieee Access 9:131858\u2013131876","journal-title":"Ieee Access"},{"key":"8338_CR9","doi-asserted-by":"crossref","unstructured":"Lee W, Kim S, Lee GG (2024) Enhancing dialogue speech recognition with robust contextual awareness via noise representation learning. arxiv:2408.06043","DOI":"10.18653\/v1\/2024.sigdial-1.30"},{"key":"8338_CR10","doi-asserted-by":"crossref","unstructured":"Weninger F, Sun Y, Park J, Willett D, Zhan P (2019) Deep learning based mandarin accent identification for accent robust asr. In: Proc. Interspeech, pp. 510\u2013514","DOI":"10.21437\/Interspeech.2019-2737"},{"key":"8338_CR11","unstructured":"Huang J, Kuchaiev O, O\u2019Neill P, Lavrukhin V, Li J, Flores A, Kucsko G, Ginsburg B (2020) Cross-language transfer learning, continuous learning, and domain adaptation for end-to-end automatic speech recognition. https:\/\/arxiv.org\/abs\/2005.04290"},{"issue":"11","key":"8338_CR12","first-page":"2579","volume":"9","author":"L Maaten","year":"2008","unstructured":"Maaten L, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9(11):2579","journal-title":"J Mach Learn Res"},{"key":"8338_CR13","unstructured":"Malinin A, Band N, Chesnokov G, Gal Y, Gales MJ, Noskov A, Ploskonosov A, Prokhorenkova L, Provilkov I, Raina V, et al (2021) Shifts: A dataset of real distributional shift across multiple large-scale tasks. https:\/\/arxiv.org\/abs\/2107.07455"},{"key":"8338_CR14","doi-asserted-by":"crossref","unstructured":"McCloskey M, Cohen NJ (1989) Catastrophic Interference in Connectionist Networks: The Sequential Learning Problem, vol. 24, pp. 109\u2013165. Academic Press, San Diego","DOI":"10.1016\/S0079-7421(08)60536-8"},{"key":"8338_CR15","volume-title":"Automatic speech recognition: The development of the SPHINX system","author":"K-F Lee","year":"1988","unstructured":"Lee K-F (1988) Automatic speech recognition: The development of the SPHINX system. Springer, New York"},{"issue":"2","key":"8338_CR16","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner LR (1989) A tutorial on hidden markov models and selected applications in speech recognition. Proc IEEE 77(2):257\u2013286","journal-title":"Proc IEEE"},{"issue":"6","key":"8338_CR17","doi-asserted-by":"publisher","first-page":"1404","DOI":"10.1109\/TASSP.1985.1164727","volume":"33","author":"B-H Juang","year":"1985","unstructured":"Juang B-H, Rabiner L (1985) Mixture autoregressive hidden markov models for speech signals. IEEE Trans Acoust Speech Signal Process 33(6):1404\u20131413","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"issue":"1","key":"8338_CR18","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"GE Dahl","year":"2011","unstructured":"Dahl GE, Yu D, Deng L, Acero A (2011) Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans Audio Speech Lang Process 20(1):30\u201342","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"10","key":"8338_CR19","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid O, Mohamed A-R, Jiang H, Deng L, Penn G, Yu D (2014) Convolutional neural networks for speech recognition. IEEE\/ACM Trans Audio Speech Language Process 22(10):1533\u20131545","journal-title":"IEEE\/ACM Trans Audio Speech Language Process"},{"key":"8338_CR20","doi-asserted-by":"crossref","unstructured":"Graves A (2012) Sequence transduction with recurrent neural networks. arxiv:1211.3711","DOI":"10.1007\/978-3-642-24797-2"},{"key":"8338_CR21","doi-asserted-by":"crossref","unstructured":"Vinyals O, Ravuri SV, Povey D (2012) Revisiting recurrent neural networks for robust asr. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4085\u20134088","DOI":"10.1109\/ICASSP.2012.6288816"},{"key":"8338_CR22","doi-asserted-by":"crossref","unstructured":"Graves A, Mohamed A-R, Hinton G (2013) Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"8338_CR23","doi-asserted-by":"crossref","unstructured":"Dong L, Xu S, Xu B (2018) Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5884\u20135888","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"8338_CR24","doi-asserted-by":"crossref","unstructured":"Gulati A, Qin J, Chiu C-C, Parmar N, Zhang Y, Yu J, Han W, Wang S, Zhang Z, Wu Y, et al (2020) Conformer: Convolution-augmented transformer for speech recognition. arxiv:2005.08100","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"8338_CR25","doi-asserted-by":"crossref","unstructured":"Zhang Q, Lu H, Sak H, Tripathi A, McDermott E, Koo S, Kumar S (2020) Transformer transducer: A streamable speech recognition model with transformer encoders and rnn-t loss. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7829\u20137833","DOI":"10.1109\/ICASSP40776.2020.9053896"},{"key":"8338_CR26","unstructured":"Amodei D, Ananthanarayanan S, Anubhai R, Bai J, Battenberg E, Case C, Casper J, Catanzaro B, Cheng Q, Chen G, et al (2016) Deep speech 2: End-to-end speech recognition in english and mandarin. In: International Conference on Machine Learning, pp. 173\u2013182"},{"key":"8338_CR27","first-page":"12449","volume":"33","author":"A Baevski","year":"2020","unstructured":"Baevski A, Zhou Y, Mohamed A, Auli M (2020) wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv Neural Inf Process Syst 33:12449\u201312460","journal-title":"Adv Neural Inf Process Syst"},{"key":"8338_CR28","unstructured":"Radford A, Kim JW, Xu T, Brockman G, McLeavey C, Sutskever I (2023) Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning, pp. 28492\u201328518"},{"key":"8338_CR29","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1109\/TASLP.2022.3140552","volume":"30","author":"Y Qian","year":"2022","unstructured":"Qian Y, Zhou Z (2022) Optimizing data usage for low-resource speech recognition. IEEE\/ACM Trans Audio Speech Language Process 30:394\u2013403","journal-title":"IEEE\/ACM Trans Audio Speech Language Process"},{"key":"8338_CR30","doi-asserted-by":"crossref","unstructured":"Jain R, Barcovschi A, Yiwere M, Corcoran P, Cucu H (2023) Adaptation of whisper models to child speech recognition. arxiv:2307.13008","DOI":"10.21437\/Interspeech.2023-935"},{"key":"8338_CR31","doi-asserted-by":"crossref","unstructured":"Xu T, Huang K, Guo P, Zhou Y, Huang L, Xue H, Xie L (2024) Towards rehearsal-free multilingual asr: A lora-based case study on whisper. arxiv:2408.10680","DOI":"10.21437\/Interspeech.2024-1953"},{"key":"8338_CR32","unstructured":"Pillai LG, Manohar K, Raju BK, Sherly E (2024) Multistage fine-tuning strategies for automatic speech recognition in low-resource languages. arxiv:2411.04573"},{"key":"8338_CR33","doi-asserted-by":"crossref","unstructured":"Li J, Zhang W-Q (2024) Whisper-based transfer learning for alzheimer disease classification: Leveraging speech segments with full transcripts as prompts. In: ICASSP 2024\u20132024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 11211\u201311215","DOI":"10.1109\/ICASSP48485.2024.10448004"},{"issue":"2","key":"8338_CR34","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1007\/s10586-024-04839-z","volume":"28","author":"P Gupta","year":"2025","unstructured":"Gupta P, Raul S, Shoba S, Veeramani K (2025) Empowering healthcare with bieh-blockchain inter-operable electronic health record scheme. Clust Comput 28(2):142","journal-title":"Clust Comput"},{"issue":"1\u20132","key":"8338_CR35","first-page":"77","volume":"27","author":"E Manikandan","year":"2025","unstructured":"Manikandan E, Pravin CS, Kiruthika V, Gopikashree S, Chandan H, Karthigeyan KA, Shoba S (2025) Initial study on applicability of thermal imaging for microplastics identification in aquatic medium. J Optoelectron Adv Mater 27(1\u20132):77\u201382","journal-title":"J Optoelectron Adv Mater"},{"key":"8338_CR36","doi-asserted-by":"crossref","unstructured":"Zhao S, Ma B, Watcharasupat KN, Gan W-S (2022) Frcrn: Boosting feature representation using frequency recurrence for monaural speech enhancement. In: ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 9281\u20139285","DOI":"10.1109\/ICASSP43922.2022.9747578"},{"key":"8338_CR37","unstructured":"Yang Z, Pang T, Feng H, Wang H, Chen W, Zhu M, Liu Q (2024) Self-distillation bridges distribution gap in language model fine-tuning. arxiv:2402.13669v2 (2024)"},{"issue":"4","key":"8338_CR38","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1007\/s11390-021-1119-0","volume":"38","author":"Y-G Xu","year":"2023","unstructured":"Xu Y-G, Qiu X-P, Zhou L-G, Huang X-J (2023) Improving bert fine-tuning via self-ensemble and self-distillation. J Comput Sci Technol 38(4):853\u2013866","journal-title":"J Comput Sci Technol"},{"key":"8338_CR39","unstructured":"M\u00fcller R, Kornblith S, Hinton GE (2019) When does label smoothing help? Advances in neural information processing systems 32"},{"key":"8338_CR40","unstructured":"Lukasik M, Bhojanapalli S, Menon A, Kumar S (2020) Does label smoothing mitigate label noise? In: International Conference on Machine Learning, pp. 6448\u20136458"},{"key":"8338_CR41","unstructured":"Houlsby N, Giurgiu A, Jastrzebski S, Morrone B, De Laroussilhe Q, Gesmundo A, Attariyan M, Gelly S (2019) Parameter-efficient transfer learning for nlp. In: International Conference on Machine Learning, pp. 2790\u20132799"},{"key":"8338_CR42","first-page":"136084","volume":"37","author":"Z Mai","year":"2024","unstructured":"Mai Z, Chowdhury A, Zhang P, Tu C-H, Chen H-Y, Pahuja V, Berger-Wolf T, Gao S, Stewart C, Su Y et al (2024) Fine-tuning is fine, if calibrated. Adv Neural Inf Process Syst 37:136084\u2013136119","journal-title":"Adv Neural Inf Process Syst"},{"key":"8338_CR43","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08338-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-026-08338-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08338-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T02:39:07Z","timestamp":1772332747000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-026-08338-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,1]]},"references-count":43,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2026,3]]}},"alternative-id":["8338"],"URL":"https:\/\/doi.org\/10.1007\/s11227-026-08338-3","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,1]]},"assertion":[{"value":"6 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"211"}}