{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T07:11:46Z","timestamp":1742973106600,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819755936"},{"type":"electronic","value":"9789819755943"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5594-3_30","type":"book-chapter","created":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T15:06:26Z","timestamp":1723561586000},"page":"354-364","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Context-Sensitive Adapter: Contextual Biasing for Personalized End-to-End Speech Recognition with Attention Fusion and Bias Filtering"],"prefix":"10.1007","author":[{"given":"Yineng","family":"Cai","sequence":"first","affiliation":[]},{"given":"Lixu","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yongchao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Nurmemet","family":"Yolwas","sequence":"additional","affiliation":[]},{"given":"Wushouer","family":"Silamu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,14]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"Bahdanau, D., Chorowski, J., Serdyuk, D., Brakel, P., Bengio, Y.: End- to-End attention-based large vocabulary speech recognition (2016). https:\/\/doi.org\/10.48550\/arXiv.1508.04395","key":"30_CR1","DOI":"10.48550\/arXiv.1508.04395"},{"doi-asserted-by":"publisher","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP), pp. 4960\u20134964 (2016). https:\/\/doi.org\/10.1109\/ICASSP.2016.7472621","key":"30_CR2","DOI":"10.1109\/ICASSP.2016.7472621"},{"doi-asserted-by":"publisher","unstructured":"Chang, F.J.: Context-aware transformer transducer for speech recognition. In: 2021 IEEE automatic speech recognition and understanding workshop (ASRU), pp. 503\u2013510 (2021). https:\/\/doi.org\/10.1109\/ASRU51503.2021.9687895","key":"30_CR3","DOI":"10.1109\/ASRU51503.2021.9687895"},{"doi-asserted-by":"publisher","unstructured":"Chorowski, J., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-Based Models for Speech Recognition (2015). https:\/\/doi.org\/10.48550\/arXiv.1506.07503","key":"30_CR4","DOI":"10.48550\/arXiv.1506.07503"},{"doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M. W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2\u20137, 2019, Vol. 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/V1\/N19-1423","key":"30_CR5","DOI":"10.18653\/V1\/N19-1423"},{"doi-asserted-by":"publisher","unstructured":"Dong, L., Xu, S., Xu, B.: Speech-Transformer: a No-recurrence sequence-to-sequence model for speech recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5884\u20135888 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8462506","key":"30_CR6","DOI":"10.1109\/ICASSP.2018.8462506"},{"doi-asserted-by":"publisher","unstructured":"Fu, X., et al.: Robust acoustic and semantic contextual biasing in neural transducers for speech recognition. In: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). pp. 1\u20135 (2023). https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10094808","key":"30_CR7","DOI":"10.1109\/ICASSP49357.2023.10094808"},{"doi-asserted-by":"publisher","unstructured":"Gourav, A., et al.: personalization strategies for End-to-End speech recognition systems. In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7348\u20137352 (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9413962","key":"30_CR8","DOI":"10.1109\/ICASSP39728.2021.9413962"},{"doi-asserted-by":"publisher","unstructured":"Graves, A.: Sequence Transduction with Recurrent Neural Networks (2012). https:\/\/doi.org\/10.48550\/arXiv.1211.3711","key":"30_CR9","DOI":"10.48550\/arXiv.1211.3711"},{"doi-asserted-by":"publisher","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: Labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ICML \u201806, Association for Computing Machinery, New York, NY, USA (2006). https:\/\/doi.org\/10.1145\/1143844.1143891","key":"30_CR10","DOI":"10.1145\/1143844.1143891"},{"unstructured":"Graves, A., Jaitly, N.: Towards End-To-End speech recognition with recurrent neural networks. In: Proceedings of the 31st International Conference on Machine Learning, pp. 1764\u20131772. PMLR (2014)","key":"30_CR11"},{"unstructured":"Ha, D., Dai, A.M., Le, Q.V.: HyperNetworks. In: International Conference on Learning Representations (2016)","key":"30_CR12"},{"doi-asserted-by":"publisher","unstructured":"He, Y., et al.: Streaming End-to-end speech recognition for mobile devices. In: ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6381\u20136385 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8682336","key":"30_CR13","DOI":"10.1109\/ICASSP.2019.8682336"},{"doi-asserted-by":"publisher","unstructured":"Jain, M., Keren, G., Mahadeokar, J., Zweig, G., Metze, F., Saraf, Y.: Contextual RNN-T for Open Domain ASR (2020). https:\/\/doi.org\/10.48550\/arXiv.2006.03411","key":"30_CR14","DOI":"10.48550\/arXiv.2006.03411"},{"doi-asserted-by":"publisher","unstructured":"Li, B., et al.: Towards fast and accurate streaming End-To-End ASR. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6069\u20136073 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054715","key":"30_CR15","DOI":"10.1109\/ICASSP40776.2020.9054715"},{"doi-asserted-by":"publisher","unstructured":"Li, J.: Recent Advances in End-to-End Automatic Speech Recognition (2022). https:\/\/doi.org\/10.48550\/arXiv.2111.01690","key":"30_CR16","DOI":"10.48550\/arXiv.2111.01690"},{"doi-asserted-by":"publisher","unstructured":"Mai, F., Zuluaga-Gomez, J., Parcollet, T., Motlicek, P.: HyperCon-former: Multi-head HyperMixer for Efficient Speech Recognition (2023). https:\/\/doi.org\/10.48550\/arXiv.2305.18281","key":"30_CR17","DOI":"10.48550\/arXiv.2305.18281"},{"doi-asserted-by":"publisher","unstructured":"Munkhdalai, T., et al.: fast contextual adaptation with neural associative memory for on-device personalized speech recognition. In: ICASSP 2022 - 2022 IEEE Inter- national Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6632\u20136636 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747726","key":"30_CR18","DOI":"10.1109\/ICASSP43922.2022.9747726"},{"doi-asserted-by":"publisher","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech: An ASR corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5206\u20135210 (2015). https:\/\/doi.org\/10.1109\/ICASSP.2015.7178964","key":"30_CR19","DOI":"10.1109\/ICASSP.2015.7178964"},{"doi-asserted-by":"publisher","unstructured":"Pundak, G., Sainath, T. N., Prabhavalkar, R., Kannan, A., Zhao, D.: Deep Context: End-to-end Contextual Speech Recognition. In: 2018 IEEE Spoken Language Technology Workshop (SLT), pp. 418\u2013425 (2018). https:\/\/doi.org\/10.1109\/SLT.2018.8639034","key":"30_CR20","DOI":"10.1109\/SLT.2018.8639034"},{"doi-asserted-by":"publisher","unstructured":"Ravanelli, M.: A general-purpose speech Toolkit (2021). https:\/\/doi.org\/10.48550\/arXiv.2106.04624","key":"30_CR21","DOI":"10.48550\/arXiv.2106.04624"},{"doi-asserted-by":"publisher","unstructured":"Sathyendra, K.M.: Contextual Adapters for Personalized Speech Recognition in Neural Transducers. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8537\u20138541 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746126","key":"30_CR22","DOI":"10.1109\/ICASSP43922.2022.9746126"},{"doi-asserted-by":"publisher","unstructured":"Sun, G., Zhang, C., Woodland, P.C.: Tree-constrained pointer generator for end-to-end contextual speech recognition. In: 2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 780\u2013787 (2021). https:\/\/doi.org\/10.1109\/ASRU51503.2021.9687915","key":"30_CR23","DOI":"10.1109\/ASRU51503.2021.9687915"},{"key":"30_CR24","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1109\/TASLP.2022.3224286","volume":"31","author":"G Sun","year":"2023","unstructured":"Sun, G., Zhang, C., Woodland, P.C.: Minimising biasing word errors for contextual ASR with the tree-constrained pointer generator. IEEE-ACM Trans. Audio Speech Lang. Process. 31, 345\u2013354 (2023). https:\/\/doi.org\/10.1109\/TASLP.2022.3224286","journal-title":"IEEE-ACM Trans. Audio Speech Lang. Process."},{"doi-asserted-by":"publisher","unstructured":"Tripathi, A., Lu, H., Sak, H., Soltau, H.: monotonic recurrent neural network transducer and decoding strategies. In: 2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 944\u2013948 (2019). https:\/\/doi.org\/10.1109\/ASRU46091.2019.9003822","key":"30_CR25","DOI":"10.1109\/ASRU46091.2019.9003822"},{"unstructured":"Vaswani, A., et al.: Attention is all you need. In: Adv. Neural Inf. Process. Syst. vol. 30. Curran Associates, Inc. (2017)","key":"30_CR26"},{"doi-asserted-by":"publisher","unstructured":"Xu, T., et al.: Adaptive contextual biasing for transducer based streaming speech recognition (2023) https:\/\/doi.org\/10.48550\/arXiv.2306.00804","key":"30_CR27","DOI":"10.48550\/arXiv.2306.00804"},{"doi-asserted-by":"publisher","unstructured":"Zhao, D., et al.: Shallow-Fusion End-to-End Contextual Biasing. In: Proc. Interspeech 2019, pp. 1418\u20131422 (2019).https:\/\/doi.org\/10.21437\/Interspeech.2019-12","key":"30_CR28","DOI":"10.21437\/Interspeech.2019-12"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5594-3_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T15:12:22Z","timestamp":1723561942000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5594-3_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819755936","9789819755943"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5594-3_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"14 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2024\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}