{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:15:55Z","timestamp":1740147355738,"version":"3.37.3"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,3,26]],"date-time":"2024-03-26T00:00:00Z","timestamp":1711411200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,26]],"date-time":"2024-03-26T00:00:00Z","timestamp":1711411200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2023A04J1051"],"award-info":[{"award-number":["2023A04J1051"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272172","2023A1515012920"],"award-info":[{"award-number":["62272172","2023A1515012920"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SOCA"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s11761-024-00384-0","type":"journal-article","created":{"date-parts":[[2024,3,26]],"date-time":"2024-03-26T03:01:58Z","timestamp":1711422118000},"page":"145-152","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A reweighting method for speech recognition with imbalanced data of Mandarin and sub-dialects"],"prefix":"10.1007","volume":"18","author":[{"given":"Jiaju","family":"Wu","sequence":"first","affiliation":[]},{"given":"Zhengchang","family":"Wen","sequence":"additional","affiliation":[]},{"given":"Haitian","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Hanjing","family":"Su","sequence":"additional","affiliation":[]},{"given":"Fei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Huan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8564-7289","authenticated-orcid":false,"given":"Qingyao","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,26]]},"reference":[{"key":"384_CR1","unstructured":"Tang Z, Wang D, Xu Y, Sun J, Lei X, Zhao S, Wen C, Tan X, Xie C, Zhou S, Yan R, Lv C, Han Y, Zou W, Li X (2021) KeSpeech: an open source speech dataset of mandarin and its eight subdialects. https:\/\/openreview.net\/forum?id=b3Zoeq2sCLq"},{"key":"384_CR2","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002) Smote: synthetic minority over-sampling technique. J Artif Intell Res 16:321\u2013357","journal-title":"J Artif Intell Res"},{"key":"384_CR3","doi-asserted-by":"crossref","unstructured":"Huang C, Li Y, Loy CC, Tang X (2016) Learning deep representation for imbalanced classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5375\u20135384","DOI":"10.1109\/CVPR.2016.580"},{"key":"384_CR4","unstructured":"Kang B, Xie S, Rohrbach M, Yan Z, Gordo A, Feng J, Kalantidis Y (2019) Decoupling representation and classifier for long-tailed recognition. arXiv preprint arXiv:1910.09217"},{"key":"384_CR5","doi-asserted-by":"crossref","unstructured":"Cui Y, Jia M, Lin T-Y, Song Y, Belongie S (2019) Class-balanced loss based on effective number of samples. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9268\u20139277","DOI":"10.1109\/CVPR.2019.00949"},{"key":"384_CR6","doi-asserted-by":"crossref","unstructured":"Liu Z, Miao Z, Zhan X, Wang J, Gong B, Yu SX (2019) Large-scale long-tailed recognition in an open world. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2537\u20132546","DOI":"10.1109\/CVPR.2019.00264"},{"issue":"1","key":"384_CR7","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/MASSP.1986.1165342","volume":"3","author":"L Rabiner","year":"1986","unstructured":"Rabiner L, Juang B (1986) An introduction to hidden Markov models. IEEE Assp Mag 3(1):4\u201316","journal-title":"IEEE Assp Mag"},{"issue":"2","key":"384_CR8","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner LR (1989) A tutorial on hidden Markov models and selected applications in speech recognition. Proc IEEE 77(2):257\u2013286","journal-title":"Proc IEEE"},{"issue":"10","key":"384_CR9","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid O, Mohamed A-R, Jiang H, Deng L, Penn G, Yu D (2014) Convolutional neural networks for speech recognition. IEEE\/ACM Trans Audio Speech Lang Process 22(10):1533\u20131545","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"384_CR10","doi-asserted-by":"crossref","unstructured":"Han W, Zhang Z, Zhang Y, Yu J, Chiu C-C, Qin J, Gulati A, Pang R, Wu Y (2020) Contextnet: improving convolutional neural networks for automatic speech recognition with global context. arXiv preprint arXiv:2005.03191","DOI":"10.21437\/Interspeech.2020-2059"},{"issue":"2","key":"384_CR11","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1007\/s11761-022-00340-w","volume":"16","author":"Y Hao","year":"2022","unstructured":"Hao Y, Wu J, Huang X, Zhang Z, Liu F, Wu Q (2022) Speaker extraction network with attention mechanism for speech dialogue system. SOCA 16(2):111\u2013119","journal-title":"SOCA"},{"key":"384_CR12","doi-asserted-by":"crossref","unstructured":"Miao Y, Gowayyed M, Metze F (2015) EESEN: end-to-end speech recognition using deep RNN models and WFST-based decoding. In: 2015 IEEE workshop on automatic speech recognition and understanding (ASRU). IEEE, pp 167\u2013174","DOI":"10.1109\/ASRU.2015.7404790"},{"issue":"4","key":"384_CR13","doi-asserted-by":"publisher","first-page":"235","DOI":"10.2478\/jaiscr-2019-0006","volume":"9","author":"A Shewalkar","year":"2019","unstructured":"Shewalkar A, Nyavanandi D, Ludwig SA (2019) Performance evaluation of deep neural networks applied to speech recognition: RNN, LSTM and GRU. J Artif Intell Soft Comput Res 9(4):235\u2013245","journal-title":"J Artif Intell Soft Comput Res"},{"key":"384_CR14","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, vol 30"},{"key":"384_CR15","doi-asserted-by":"crossref","unstructured":"Watanabe S, Hori T, Karita S, Hayashi T, Nishitoba J, Unno Y, Soplin NEY, Heymann J, Wiesner M, Chen N et al (2018) Espnet: end-to-end speech processing toolkit. arXiv preprint arXiv:1804.00015","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"384_CR16","doi-asserted-by":"crossref","unstructured":"Dong L, Xu S, Xu B (2018) Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition. In: 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 5884\u20135888","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"384_CR17","doi-asserted-by":"crossref","unstructured":"Wang Y, Mohamed A, Le D, Liu C, Xiao A, Mahadeokar J, Huang H, Tjandra A, Zhang X, Zhang F et al (2020) Transformer-based acoustic modeling for hybrid speech recognition. In: ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6874\u20136878","DOI":"10.1109\/ICASSP40776.2020.9054345"},{"key":"384_CR18","doi-asserted-by":"crossref","unstructured":"Chan W, Jaitly N, Le Q, Vinyals, O (2016) Listen, attend and spell: A neural network for large vocabulary conversational speech recognition. In: 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 4960\u20134964","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"384_CR19","doi-asserted-by":"crossref","unstructured":"Gulati A, Qin J, Chiu C-C, Parmar N, Zhang Y, Yu J, Han W, Wang S, Zhang Z, Wu Y et al (2020) Conformer: convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"384_CR20","doi-asserted-by":"crossref","unstructured":"Yao Z, Wu D, Wang X, Zhang B, Yu F, Yang C, Peng Z, Chen X, Xie L, Lei X (2021) Wenet: production oriented streaming and non-streaming end-to-end speech recognition toolkit. In: Proc Interspeech, Brno, Czech Republic. IEEE","DOI":"10.21437\/Interspeech.2021-1983"},{"key":"384_CR21","unstructured":"Amodei D, Ananthanarayanan S, Anubhai R, Bai J, Battenberg E, Case C, Casper J, Catanzaro B, Cheng Q, Chen G et al (2016) Deep speech 2: end-to-end speech recognition in English and mandarin. In: International conference on machine learning. PMLR, pp 173\u2013182"},{"key":"384_CR22","doi-asserted-by":"crossref","unstructured":"Hannun A, Lee A, Xu Q, Collobert R (2019) Sequence-to-sequence speech recognition with time-depth separable convolutions. arXiv preprint arXiv:1904.02619","DOI":"10.21437\/Interspeech.2019-2460"},{"key":"384_CR23","doi-asserted-by":"crossref","unstructured":"He Y, Sainath TN, Prabhavalkar R, McGraw I, Alvarez R, Zhao D, Rybach D, Kannan A, Wu Y, Pang R et al (2019) Streaming end-to-end speech recognition for mobile devices. In: ICASSP 2019-2019 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6381\u20136385","DOI":"10.1109\/ICASSP.2019.8682336"},{"issue":"9","key":"384_CR24","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He H, Garcia EA (2009) Learning from imbalanced data. IEEE Trans Knowl Data Eng 21(9):1263\u20131284","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"7","key":"384_CR25","doi-asserted-by":"publisher","first-page":"2983","DOI":"10.1109\/JBHI.2022.3162748","volume":"26","author":"P Liu","year":"2022","unstructured":"Liu P, Zheng G (2022) Handling imbalanced data: uncertainty-guided virtual adversarial training with batch nuclear-norm optimization for semi-supervised medical image classification. IEEE J Biomed Health Inform 26(7):2983\u20132994","journal-title":"IEEE J Biomed Health Inform"},{"key":"384_CR26","doi-asserted-by":"crossref","unstructured":"Shamsudin H, Yusof UK, Jayalakshmi A, Khalid MNA (2020) Combining oversampling and undersampling techniques for imbalanced classification: a comparative study using credit card fraudulent transaction dataset. In: 2020 IEEE 16th international conference on control & automation (ICCA). IEEE, pp 803\u2013808","DOI":"10.1109\/ICCA51439.2020.9264517"},{"key":"384_CR27","doi-asserted-by":"crossref","unstructured":"Zhao L, Shang Z, Tan J, Zhou M, Zhang M, Gu D, Zhang T, Tang YY (2022) Siamese networks with an online reweighted example for imbalanced data learning. Pattern Recogn 132:108947","DOI":"10.1016\/j.patcog.2022.108947"},{"key":"384_CR28","doi-asserted-by":"crossref","unstructured":"Kannan A, Datta A, Sainath TN, Weinstein E, Ramabhadran B, Wu Y, Bapna A, Chen Z, Lee S (2019) Large-scale multilingual speech recognition with a streaming end-to-end model. arXiv preprint arXiv:1909.05330","DOI":"10.21437\/Interspeech.2019-2858"},{"key":"384_CR29","unstructured":"Soky K, Li S, Mimura M, Chu C, Kawahara T (2021) On the use of speaker information for automatic speech recognition in speaker-imbalanced corpora. In: 2021 Asia-Pacific signal and information processing association annual summit and conference (APSIPA ASC). IEEE, pp 433\u2013437"},{"key":"384_CR30","doi-asserted-by":"crossref","unstructured":"Winata GI, Wang G, Xiong C, Hoi S (2020) Adapt-and-adjust: overcoming the long-tail problem of multilingual speech recognition. arXiv preprint arXiv:2012.01687","DOI":"10.21437\/Interspeech.2021-1390"}],"container-title":["Service Oriented Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11761-024-00384-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11761-024-00384-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11761-024-00384-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T08:35:43Z","timestamp":1715070943000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11761-024-00384-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,26]]},"references-count":30,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["384"],"URL":"https:\/\/doi.org\/10.1007\/s11761-024-00384-0","relation":{},"ISSN":["1863-2386","1863-2394"],"issn-type":[{"type":"print","value":"1863-2386"},{"type":"electronic","value":"1863-2394"}],"subject":[],"published":{"date-parts":[[2024,3,26]]},"assertion":[{"value":"15 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 March 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no potential conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}