{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:26:10Z","timestamp":1740122770999,"version":"3.37.3"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T00:00:00Z","timestamp":1667433600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T00:00:00Z","timestamp":1667433600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62071302"],"award-info":[{"award-number":["62071302"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1007\/s10772-022-10010-z","type":"journal-article","created":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T16:03:55Z","timestamp":1667491435000},"page":"987-995","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Universal and accent-discriminative encoders for conformer-based accent-invariant speech recognition"],"prefix":"10.1007","volume":"25","author":[{"given":"Xuefei","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0924-408X","authenticated-orcid":false,"given":"Yanhua","family":"Long","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongxing","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,3]]},"reference":[{"key":"10010_CR1","unstructured":"Ardila, R., Branson, M., Davis, K., Henretty, M., Kohler, M., Meyer, J., Morais, R., Saunders, L., Tyers, F.M., & Weber, G. (2020). Common voice: A massively-multilingual speech corpus. In Proceedings of LREC (pp. 4218\u20134222)."},{"key":"10010_CR2","doi-asserted-by":"crossref","unstructured":"Bahdanau, D., Chorowski, J., Serdyuk, D., Brakel, P., & Bengio, Y. (2016). End-to-end attention-based large vocabulary speech recognition. In Proceedings of ICASSP (pp. 4945\u20134949)","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"10010_CR4","doi-asserted-by":"crossref","unstructured":"Chen, M., Yang, Z., Liang, J., Li, Y., & Liu, W. (2015). Improving deep neural networks based multi-accent Mandarin speech recognition using I-vectors and accent-specific top layer. In Proceedings of interspeech (pp. 3620\u20133624)","DOI":"10.21437\/Interspeech.2015-718"},{"key":"10010_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Y. C., Yang, Z., Yeh, C. F., Jain, M., & Seltzer, M. L. (2020). AIPNet: Generative adversarial pre-training of accent-invariant networks for end-to-end speech recognition. In Proceedings of ICASSP (pp. 6879\u20136983).","DOI":"10.1109\/ICASSP40776.2020.9053098"},{"key":"10010_CR5","doi-asserted-by":"crossref","unstructured":"Chiu, C. C., Sainath, T. N., Wu, Y., Prabhavalkar, R., Nguyen, P., Chen, Z., Kannan, A., Weiss, R. J., Rao, K., Gonina, E., & Jaitly, N. (2018). State-of-the-art speech recognition with sequence-to-sequence models. In Proceedings of ICASSP (pp. 4774\u20134778)","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"10010_CR6","doi-asserted-by":"crossref","unstructured":"Das, A., Kumar, K., & Wu, J. (2021). Multi-dialect speech recognition in english using attention on ensemble of experts. In Proceedings of ICASSP (pp. 6244\u20136248)","DOI":"10.1109\/ICASSP39728.2021.9413952"},{"key":"10010_CR7","doi-asserted-by":"crossref","unstructured":"Gong, X., Lu, Y., Zhou, Z., & Qian, Y. (2021). Layer-wise fast adaptation for end-to-end multi-accent speech recognition. In Proceedings of interspeech (pp. 1274\u20131278)","DOI":"10.21437\/Interspeech.2021-1075"},{"key":"10010_CR8","unstructured":"Gotmare, A., Keskar, N. S., Xiong, C., & Socher, R. (2019). A closer look at deep learning heuristics: Learning rate restarts, warmup and distillation. In International conference on learning representations."},{"key":"10010_CR9","doi-asserted-by":"crossref","unstructured":"Gulati, A., Qin, J., Chiu, C. C., Parmar, N., Zhang, Y., Yu, J., Han, W., Wang, S., Zhang, Z., Wu, Y., & Pang, R. (2020). Conformer: Convolution-augmented transformer for speech recognition. In Proceedings of interspeech (pp. 5036\u20135040)","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"10010_CR10","doi-asserted-by":"crossref","unstructured":"Hu, H., Yang, X., Raeesy, Z., Guo, J., Keskin, G., Arsikere, H., Rastrow, A., Stolcke, A., & Maas, R. (2021). Redat: Accent-invariant representation for end-to-end asr by domain adversarial training with relabeling. In Proceedings of ICASSP (pp. 6408\u20136412).","DOI":"10.1109\/ICASSP39728.2021.9414291"},{"issue":"1","key":"10010_CR11","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1561\/116.00000045","volume":"11","author":"Ryo Imaizumi","year":"2022","unstructured":"Imaizumi, R., Masumura, R., Shiota, S., & Kiya, H. (2022). End-to-end Japanese multi-dialect speech recognition and dialect identification with multi-task learning. APSIPA Transactions on Signal and Information Processing, 11(1), 4.","journal-title":"APSIPA Transactions on Signal and Information Processing"},{"key":"10010_CR12","doi-asserted-by":"crossref","unstructured":"Jain, A., Upreti, M. & Jyothi, P. (2018). Improved accented speech recognition using accent embeddings and multi-task learning. In Proceedings of interspeech (pp. 2454\u20132458)","DOI":"10.21437\/Interspeech.2018-1864"},{"key":"10010_CR14","doi-asserted-by":"crossref","unstructured":"Karita, S., Chen, N., Hayashi, T., Hori, T., Inaguma, H., Jiang, Z., Someki, M., Soplin, N. E. Y., Yamamoto, R., Wang, X., & Watanabe, S. (2019a). A comparative study on transformer vs RNN in speech applications. In Proceedings of ASRU (pp. 449-456)","DOI":"10.1109\/ASRU46091.2019.9003750"},{"key":"10010_CR13","doi-asserted-by":"crossref","unstructured":"Karita, S., Enrique, N., Soplin, Y., & Watanabe, S. (2019b). Improving transformer-based end-to-end speech recognition with connectionist temporal classification and language model integration. In Proceedings of interspeech (pp. 1408\u20131412).","DOI":"10.21437\/Interspeech.2019-1938"},{"key":"10010_CR15","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. In International conference on learning representations."},{"key":"10010_CR17","doi-asserted-by":"crossref","unstructured":"Li, B., Sainath, T. N., Sim, K. C., Bacchiani, M., Weinstein, E., Nguyen, P., Chen, Z., Wu, Y., & Rao, K. (2018). Multi-dialect speech recognition with a single sequence-to-sequence model. In Proceedings of ICASSP (pp. 4749\u20134753)","DOI":"10.1109\/ICASSP.2018.8461886"},{"issue":"1","key":"10010_CR16","first-page":"8","volume":"11","author":"Jinyu Li","year":"2020","unstructured":"Li, J. (2020). Recent advances in end-to-end automatic speech recognition. APSIPA Transactions on Signal and Information Processing, 11(1), 8.","journal-title":"APSIPA Transactions on Signal and Information Processing"},{"key":"10010_CR18","doi-asserted-by":"crossref","unstructured":"Li, S., Ouyang, B., Liao, D., Xia, S., Li, L., & Hong, Q. (2021). End-to-end multi-accent speech recognition with unsupervised accent modelling. In Proceedings of ICASSP (pp. 6418\u20136422).","DOI":"10.1109\/ICASSP39728.2021.9414833"},{"key":"10010_CR19","doi-asserted-by":"crossref","unstructured":"Miao, H., Cheng, G., Gao, C., Zhang, P., & Yan, Y. (2020). Transformer-based online CTC\/attention end-to-end speech recognition architecture. In Proceedings of ICASSP (pp. 6084\u20136088)","DOI":"10.1109\/ICASSP40776.2020.9053165"},{"key":"10010_CR20","doi-asserted-by":"crossref","unstructured":"Padi, B., Mohan, A., & Ganapathy, S. (2019). Attention based hybrid I-vector BLSTM model for language recognition. In Proceedings of interspeech (pp. 1263\u20131267)","DOI":"10.21437\/Interspeech.2019-2371"},{"key":"10010_CR21","doi-asserted-by":"crossref","unstructured":"Rao, K. & Sak, H. (2017). Multi-accent speech recognition with hierarchical grapheme based models. In Proceedings of ICASSP (pp. 4815\u20134819)","DOI":"10.1109\/ICASSP.2017.7953071"},{"key":"10010_CR22","doi-asserted-by":"crossref","unstructured":"Shi, X., Yu, F., Lu, Y., Liang, Y., Feng, Q., Wang, D., Qian, Y., & Xie, L. (2020). The accented English speech recognition challenge 2020: Open datasets, tracks, baselines, results and methods. In Proceedings of ICASSP (pp. 6918\u20136922).","DOI":"10.1109\/ICASSP39728.2021.9413386"},{"key":"10010_CR23","doi-asserted-by":"crossref","unstructured":"Shor, J., Emanuel, D., Lang, O., Tuval, O., Brenner, M., Cattiau, J., Vieira, F., McNally, M., Charbonneau, T., Nollstadt, M., & Hassidim, A. (2019). Personalizing ASR for dysarthric and accented speech with limited data. In Proceedings of interspeech (pp. 784\u2013788)","DOI":"10.21437\/Interspeech.2019-1427"},{"key":"10010_CR24","unstructured":"Taku, K. (2018). Subword regularization: Improving neural network translation models with multiple subword candidates. In Proceedings of ACL (pp. 66\u201375)."},{"key":"10010_CR25","doi-asserted-by":"crossref","unstructured":"Tanaka, T., Masumura, R., Moriya, T., Oba, T., & Aono, Y. (2019). A joint end-to-end and DNN-HMM hybrid automatic speech recognition system with transferring sharable knowledge. In Proceedings of interspeech (pp. 2210\u20132214)","DOI":"10.21437\/Interspeech.2019-2263"},{"key":"10010_CR26","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, \u0141., & Polosukhin, I. (2017). Attention is all you need. In Proceedings of advances in neural information processing systems (pp. 5998\u20136008) (2017)."},{"key":"10010_CR28","doi-asserted-by":"crossref","unstructured":"Watanabe, S., Hori, T., Karita, S., Hayashi, T., Nishitoba, J., Unno, Y., Soplin, N. E. Y., Heymann, J., Wiesner, M., Chen, N., & Renduchintala, A. (2018). ESPnet: End-to-end speech processing toolkit. In Proceedings of interspeech (pp. 2207\u20132211).","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"10010_CR27","doi-asserted-by":"crossref","unstructured":"Watanabe, S., Hori, T., Kim, S., Hershey, J. R., & Hayashi, T. (2017). Hybrid CTC\/attention architecture for end-to-end speech recognition. In IEEE Journal of Selected Topics in Signal Processing, 11(8), 1240\u20131253.","DOI":"10.1109\/JSTSP.2017.2763455"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-10010-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-022-10010-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-10010-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T11:19:02Z","timestamp":1670843942000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-022-10010-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,3]]},"references-count":28,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,12]]}},"alternative-id":["10010"],"URL":"https:\/\/doi.org\/10.1007\/s10772-022-10010-z","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2022,11,3]]},"assertion":[{"value":"10 May 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 November 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}