{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:39:14Z","timestamp":1742913554704,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030878016"},{"type":"electronic","value":"9783030878023"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-87802-3_30","type":"book-chapter","created":{"date-parts":[[2021,9,21]],"date-time":"2021-09-21T23:36:52Z","timestamp":1632267412000},"page":"327-335","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["End-to-End Russian Speech Recognition Models with Multi-head Attention"],"prefix":"10.1007","author":[{"given":"Irina","family":"Kipyatkova","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,22]]},"reference":[{"key":"30_CR1","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems. pp. 3104\u20133112 (2014)"},{"key":"30_CR2","unstructured":"Vaswani, A. et al.: Attention is all you need. arXiv preprint arXiv:1706.03762 (2017). https:\/\/arxiv.org\/abs\/1706.03762"},{"key":"30_CR3","doi-asserted-by":"publisher","first-page":"77","DOI":"10.15622\/sp.58.4","volume":"58","author":"M Markovnikov","year":"2018","unstructured":"Markovnikov, M., Kipyatkova, I.: An analytic survey of end-to-end speech recognition systems. SPIIRAS Proc. 58, 77\u2013110 (2018)","journal-title":"SPIIRAS Proc."},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning. pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Kim, S., Hori, T., Watanabe, S: Joint ctc-attention based end-to-end speech recognition using multi-task learning. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP-2017), pp. 4835\u20134839 (2017)","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Salazar, J., Kirchhoff, K., Huang, Z.: Self-attention networks for connectionist temporal classification in speech recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP-2019), pp. 7115\u20137119 (2019)","DOI":"10.1109\/ICASSP.2019.8682539"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Meng, Z., Gaur, Y., Li, J., Gong, Y.: Character-aware attention-based end-to-end speech recognition. In: Proceedings of IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 949\u2013955 (2019)","DOI":"10.1109\/ASRU46091.2019.9004018"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Moritz, N., Hori, T., Le Roux, J.: Triggered attention for end-to-end speech recognition. In: Proceedings of International Conference on Acoustics, Speech and Signal Processing (ICASSP-2019), pp. 5666\u20135670 (2019)","DOI":"10.1109\/ICASSP.2019.8683510"},{"key":"30_CR9","unstructured":"Raffel, C., Luong, M.-T., Liu, P.J., Weiss, R.J., Eck, D.: Online and linear-time attention by enforcing monotonic alignments. In: Proceedings of International Conference on Machine Learning, pp. 2837\u20132846 (2017)"},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Hayashi, T., et al.: Multi-head decoder for end-to-end speech recognition. arXiv preprint arXiv:1804.08050 (2018). https:\/\/arxiv.org\/abs\/1804.08050","DOI":"10.21437\/Interspeech.2018-1655"},{"issue":"1","key":"30_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-018-0144-6","volume":"2019","author":"C-X Qin","year":"2019","unstructured":"Qin, C.-X., Zhang, W.-L., Qu, D.: A new joint CTC-attention-based speech recognition model with multi-level multi-head attention. EURASIP J. Audio, Speech, Music Proc. 2019(1), 1\u201312 (2019)","journal-title":"EURASIP J. Audio, Speech, Music Proc."},{"key":"30_CR12","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-030-60276-5_22","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2020","unstructured":"Kipyatkova, I., Markovnikov, N.: Experimenting with attention mechanisms in joint CTC-attention models for Russian speech recognition. In: Karpov, A., Potapova, R. (eds.) SPECOM 2020. LNCS (LNAI), vol. 12335, pp. 214\u2013222. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-60276-5_22"},{"key":"30_CR13","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. ArXiv preprint arXiv:1409.1556 (2014). https:\/\/arxiv.org\/abs\/1409.1556"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"30_CR15","unstructured":"Srivastava, R.K., Greff, K., Schmidhuber, J.: Highway networks. arXiv preprint arXiv:1505.00387 (2015). https:\/\/arxiv.org\/abs\/1505.00387"},{"issue":"11","key":"30_CR16","first-page":"1027","volume":"63","author":"IS Kipyatkova","year":"2020","unstructured":"Kipyatkova, I.S., Karpov, A.A.: A comparative study of neural network architectures for integrated speech recognition system. J. Instrum. Eng. 63(11), 1027\u20131033 (2020). (In Russian)","journal-title":"J. Instrum. Eng."},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Panayotov, V. et al.: Librispeech: an ASR corpus based on public domain audio books. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP-2015), pp.\u00a05206\u20135210 (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"30_CR18","unstructured":"Kipyatkova, I., Karpov, A.: Class-based LSTM Russian language model with linguistic information. In: Proceedings 12th International Conference on Language Resources and Evaluation LREC-2020, ELRA, Marseille, France, pp.\u00a02470\u20132474 (2020)"},{"key":"30_CR19","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/978-3-319-01931-4_29","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2013","unstructured":"Kipyatkova, I., Karpov, A.: Lexicon size and language model order optimization for Russian LVCSR. In: \u017delezn\u00fd, M., Habernal, I., Ronzhin, A. (eds.) SPECOM 2013. LNCS (LNAI), vol. 8113, pp. 219\u2013226. Springer, Cham (2013). https:\/\/doi.org\/10.1007\/978-3-319-01931-4_29"},{"key":"30_CR20","doi-asserted-by":"crossref","unstructured":"Watanabe, S. et al.: Espnet: end-to-end speech processing toolkit. In: INTERSPEECH-2018, pp. 2207\u20132211 (2018)","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"30_CR21","unstructured":"Karmakar, P., Teng, S.W., Lu, G.: Thank you for attention: a survey on attention-based artificial neural networks for automatic speech recognition. arXiv preprint arXiv:2102.07259 (2021). https:\/\/arxiv.org\/abs\/2102.07259"},{"key":"30_CR22","unstructured":"Chorowski, J.K., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: Advances in Neural Information Processing Systems, pp. 577\u2013585 (2015)"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Freitag, M., Al-Onaizan, Y.: Beam search strategies for neural machine translation. ArXiv preprint arXiv:1702.01806 (2017). https:\/\/arxiv.org\/abs\/1702.01806","DOI":"10.18653\/v1\/W17-3207"},{"key":"30_CR24","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/978-3-030-26061-3_35","volume-title":"Speech and Computer","author":"N Markovnikov","year":"2019","unstructured":"Markovnikov, N., Kipyatkova, I.: Investigating joint CTC-attention models for end-to-end Russian speech recognition. In: Salah, A.A., Karpov, A., Potapova, R. (eds.) SPECOM 2019. LNCS (LNAI), vol. 11658, pp. 337\u2013347. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-26061-3_35"},{"key":"30_CR25","unstructured":"Jang, E., Gu, S., Poole, B.: Categorical reparameterization with gumbel softMax. ArXiv preprint arXiv:1611.01144 (2016). https:\/\/arxiv.org\/abs\/1611.01144"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-87802-3_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,21]],"date-time":"2021-09-21T23:46:50Z","timestamp":1632268010000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-87802-3_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030878016","9783030878023"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-87802-3_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"22 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"163","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"74","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"45% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}