{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T15:28:54Z","timestamp":1777735734927,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,20]],"date-time":"2020-03-20T00:00:00Z","timestamp":1584662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,20]]},"DOI":"10.1145\/3388818.3389159","type":"proceedings-article","created":{"date-parts":[[2020,5,22]],"date-time":"2020-05-22T23:44:48Z","timestamp":1590191088000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Improved BLSTM RNN Based Accent Speech Recognition Using Multi-task Learning and Accent Embeddings"],"prefix":"10.1145","author":[{"given":"Wenbi","family":"Rao","sequence":"first","affiliation":[{"name":"School of Computer and Technology, Wuhan University of Technology, Hubei Key Laboratory of Transportation Internet of Things, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ji","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer and Technology, Wuhan University of Technology, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianwei","family":"Wu","sequence":"additional","affiliation":[{"name":"Wuhan FiberHome Digital Technology Co., Ltd, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,5,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-405"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"W. Xiong J. Droppo X. Huang F. Seide M. Seltzer A. Stolcke D. Yu and G.Zweig \"Achieving human parity in conversational speech recognition \" arXiv preprint arXiv:1610.05256 2016.  W. Xiong J. Droppo X. Huang F. Seide M. Seltzer A. Stolcke D. Yu and G.Zweig \"Achieving human parity in conversational speech recognition \" arXiv preprint arXiv:1610.05256 2016.","DOI":"10.1109\/ICASSP.2017.7953159"},{"key":"e_1_3_2_1_3_1","volume-title":"Comparison of Acoustic Model Adaptation Techniques on Non-native Speech,\" in the Proceedings of the 2013 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)","author":"Wang Z.","year":"2013","unstructured":"Z. Wang , T. Schultz , and A. Waibel , \" Comparison of Acoustic Model Adaptation Techniques on Non-native Speech,\" in the Proceedings of the 2013 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) , 2013 . Z. Wang, T. Schultz, and A. Waibel, \"Comparison of Acoustic Model Adaptation Techniques on Non-native Speech,\" in the Proceedings of the 2013 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2013."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"e_1_3_2_1_5_1","volume-title":"IEEE Workshop on Spoken Language Technology","author":"Waters Austin","year":"2016","unstructured":"Austin Waters , Meysam Bastani , Mohamed G. Elfeky , Pedro Moreno , and Xavier Velez ,\"Towards acoustic model unification across dialects,\" in Proc . IEEE Workshop on Spoken Language Technology , 2016 . Austin Waters, Meysam Bastani, Mohamed G. Elfeky, Pedro Moreno, and Xavier Velez,\"Towards acoustic model unification across dialects,\" in Proc. IEEE Workshop on Spoken Language Technology, 2016."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"e_1_3_2_1_7_1","volume-title":"ICASSP. IEEE","author":"Zhang Yu","year":"2017","unstructured":"Yu Zhang , William Chan , and Navdeep Jaitly , \"Very deep convolutional networks for end-to-end speech recognition,\" in Proc . ICASSP. IEEE , 2017 . Yu Zhang, William Chan, and Navdeep Jaitly, \"Very deep convolutional networks for end-to-end speech recognition,\" in Proc. ICASSP. IEEE, 2017."},{"key":"e_1_3_2_1_8_1","volume-title":"ICASSP. IEEE","author":"Lu Liang","year":"2016","unstructured":"Liang Lu , Xingxing Zhang , and Steve Renais , \"On training the recurrent neural network encoder-decoder for large vocabulary end-to-end speech recognition,\" in Proc . ICASSP. IEEE , 2016 . Liang Lu, Xingxing Zhang, and Steve Renais, \"On training the recurrent neural network encoder-decoder for large vocabulary end-to-end speech recognition,\" in Proc. ICASSP. IEEE, 2016."},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. Interspeech","author":"Prabhavalkar Rohit","year":"2017","unstructured":"Rohit Prabhavalkar , Kanishka Rao , Tara N Sainath , Bo Li , Leif Johnson , and Navdeep Jaitly , \" A Comparison of Sequence to Sequence Models for Speech Recognition ,\" in Proc. Interspeech , 2017 . Rohit Prabhavalkar, Kanishka Rao, Tara N Sainath, Bo Li, Leif Johnson, and Navdeep Jaitly, \"A Comparison of Sequence to Sequence Models for Speech Recognition,\" in Proc. Interspeech, 2017."},{"key":"e_1_3_2_1_10_1","volume-title":"Deep Learning","year":"2016","unstructured":"Goodfellow, Ian, Bengio, Yoshua, and Courville, Aaron. Deep Learning . MIT Press , 2016 . Goodfellow, Ian, Bengio, Yoshua, and Courville, Aaron. Deep Learning. MIT Press, 2016."},{"key":"e_1_3_2_1_11_1","unstructured":"Ng Andrew. Sequence Models (Course 5 of Deep Learning Specialization). Coursera 2018.  Ng Andrew. Sequence Models (Course 5 of Deep Learning Specialization). Coursera 2018."},{"key":"e_1_3_2_1_12_1","first-page":"4815","article-title":"Multi-accent speech recognition with hierarchical grapheme based models","author":"Rao K.","year":"2017","unstructured":"K. Rao and H. Sak , \" Multi-accent speech recognition with hierarchical grapheme based models ,\" in Proceedings of ICASSP.IEEE , 2017 , pp. 4815 -- 4819 . K. Rao and H. Sak, \"Multi-accent speech recognition with hierarchical grapheme based models,\" in Proceedings of ICASSP.IEEE, 2017, pp. 4815--4819.","journal-title":"Proceedings of ICASSP.IEEE"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18178\/ijmlc.2019.9.4.824"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.7763\/IJMLC.2011.V1.1"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18178\/ijmlc.2018.8.3.687"},{"key":"e_1_3_2_1_16_1","volume-title":"Interspeech","author":"Sak H. H.","year":"2014","unstructured":"H. H. Sak , A. Senior , and F. Beaufays . Long short-term memory recurrent neural network architectures for large scale acoustic modeling . In Interspeech , 2014 . H. H. Sak, A. Senior, and F. Beaufays. Long short-term memory recurrent neural network architectures for large scale acoustic modeling. In Interspeech, 2014."},{"key":"e_1_3_2_1_17_1","volume-title":"Interspeech","author":"Sak H.","year":"2014","unstructured":"H. Sak , O. Vinyals , G. Heigold , A. Senior , E. McDermott , R. Monga , and M. Mao . Sequence discriminative distributed training of long shortterm memory recurrent neural networks . In Interspeech , 2014 . H. Sak, O. Vinyals, G. Heigold, A. Senior, E. McDermott, R. Monga, and M. Mao. Sequence discriminative distributed training of long shortterm memory recurrent neural networks. In Interspeech, 2014."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"A. Graves and N. Jaitly. Towards end-to-end speech recognition with recurrent neural networks. In ICML 2014.  A. Graves and N. Jaitly. Towards end-to-end speech recognition with recurrent neural networks. In ICML 2014.","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"e_1_3_2_1_19_1","volume-title":"Deep speech: Scaling up end-to-end speech recognition.1412.5567","author":"Hannun A.","year":"2014","unstructured":"A. Hannun , C. Case , J. Casper , B. Catanzaro , G. Diamos , E. Elsen , R. Prenger , S. Satheesh , S. Sengupta , A. Coates , and A. Y. Ng . Deep speech: Scaling up end-to-end speech recognition.1412.5567 , 2014 . http:\/\/arxiv.org\/abs\/1412.5567. A. Hannun, C. Case, J. Casper, B. Catanzaro, G. Diamos, E. Elsen, R. Prenger, S. Satheesh, S. Sengupta,A. Coates, and A. Y. Ng. Deep speech: Scaling up end-to-end speech recognition.1412.5567, 2014. http:\/\/arxiv.org\/abs\/1412.5567."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1038"},{"key":"e_1_3_2_1_22_1","volume-title":"First-pass large vocabulary continuous speech recognition using bi-directional recurrent DNNs. abs\/1408.2873","author":"Hannun A. Y.","year":"2014","unstructured":"A. Y. Hannun , A. L. Maas , D. Jurafsky , and A. Y. Ng . First-pass large vocabulary continuous speech recognition using bi-directional recurrent DNNs. abs\/1408.2873 , 2014 .http:\/\/arxiv.org\/abs\/1408.2873. A. Y. Hannun, A. L. Maas, D. Jurafsky, and A. Y. Ng. First-pass large vocabulary continuous speech recognition using bi-directional recurrent DNNs. abs\/1408.2873, 2014.http:\/\/arxiv.org\/abs\/1408.2873."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"H. Sak A. Senior K. Rao and F. Beaufays. Fast and accurate recurrent neural network acoustic models for speech recognition. abs\/1507.06947 2015. http:\/\/arxiv.org\/abs\/1507.06947.  H. Sak A. Senior K. Rao and F. Beaufays. Fast and accurate recurrent neural network acoustic models for speech recognition. abs\/1507.06947 2015. http:\/\/arxiv.org\/abs\/1507.06947.","DOI":"10.21437\/Interspeech.2015-350"},{"key":"e_1_3_2_1_25_1","volume-title":"Leveraging native language information for improved accented speech recognition. in the Proceedings of Interspeech","author":"Ghorbani Shahram","year":"2018","unstructured":"Shahram Ghorbani , John H.L. Hansen . Leveraging native language information for improved accented speech recognition. in the Proceedings of Interspeech 2018 . Shahram Ghorbani, John H.L. Hansen. Leveraging native language information for improved accented speech recognition. in the Proceedings of Interspeech 2018."},{"key":"e_1_3_2_1_26_1","volume-title":"Fast and accurate recurrent neural network acoustic models for speech recognition,\"arXiv preprint arXiv:1507.06947","author":"Sak H.","year":"2015","unstructured":"H. Sak , A. Senior , K. Rao , and F. Beaufays , \" Fast and accurate recurrent neural network acoustic models for speech recognition,\"arXiv preprint arXiv:1507.06947 , 2015 . H. Sak, A. Senior, K. Rao, and F. Beaufays, \"Fast and accurate recurrent neural network acoustic models for speech recognition,\"arXiv preprint arXiv:1507.06947, 2015."},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of Interspeech","author":"Chen M.M.","year":"2015","unstructured":"M.M. Chen , Z.Y. Yang , J.Z. Liang , Y.P. Li , W.J. Liu , \" Improving Deep Neural Networks Based Multi-Accent Mandarin Speech Recognition Using I- Vectors and Accent Specific Top layer,\" in the Proceedings of Interspeech 2015 . M.M. Chen, Z.Y. Yang, J.Z. Liang, Y.P. Li, W.J. Liu, \"Improving Deep Neural Networks Based Multi-Accent Mandarin Speech Recognition Using I-Vectors and Accent Specific Top layer,\" in the Proceedings of Interspeech 2015."},{"key":"e_1_3_2_1_28_1","volume-title":"The kaldi speech recognition toolkit,\" in IEEE 2011 Workshop on Automatic Speech Recognition and Understanding","author":"Povey D.","year":"2011","unstructured":"D. Povey , A. Ghoshal , G. Boulianne , L. Burget , O. Glembek , N. Goel , M. Hannemann , P. Motlicek , Y. Qian , P. Schwarz , J. Silovsky , G. Stemmer , and K. Vesely , \" The kaldi speech recognition toolkit,\" in IEEE 2011 Workshop on Automatic Speech Recognition and Understanding , 2011 . D. Povey, A. Ghoshal, G. Boulianne, L. Burget, O. Glembek, N. Goel, M. Hannemann, P. Motlicek, Y. Qian, P. Schwarz, J. Silovsky, G. Stemmer, and K. Vesely, \"The kaldi speech recognition toolkit,\" in IEEE 2011 Workshop on Automatic Speech Recognition and Understanding, 2011."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of Interspeech","author":"Jain Abhinav","year":"2018","unstructured":"Abhinav Jain , Minali Upreti , Preethi Jyothi , \" Improved Accented Speech Recognition Using Accent Embeddings and Multi-task Learning\" in the Proceedings of Interspeech 2018 . Abhinav Jain, Minali Upreti, Preethi Jyothi, \"Improved Accented Speech Recognition Using Accent Embeddings and Multi-task Learning\" in the Proceedings of Interspeech 2018."}],"event":{"name":"IVSP '20: 2020 2nd International Conference on Image, Video and Signal Processing","location":"Singapore Singapore","acronym":"IVSP '20","sponsor":["Nanyang Technological University","The Hong Kong Polytechnic The Hong Kong Polytechnic University"]},"container-title":["Proceedings of the 2020 2nd International Conference on Image, Video and Signal Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3388818.3389159","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3388818.3389159","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:39Z","timestamp":1750195899000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3388818.3389159"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,20]]},"references-count":29,"alternative-id":["10.1145\/3388818.3389159","10.1145\/3388818"],"URL":"https:\/\/doi.org\/10.1145\/3388818.3389159","relation":{},"subject":[],"published":{"date-parts":[[2020,3,20]]},"assertion":[{"value":"2020-05-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}