{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:57:09Z","timestamp":1743101829417,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602758"},{"type":"electronic","value":"9783030602765"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_22","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"214-222","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Experimenting with Attention Mechanisms in Joint CTC-Attention Models for Russian Speech Recognition"],"prefix":"10.1007","author":[{"given":"Irina","family":"Kipyatkova","sequence":"first","affiliation":[]},{"given":"Nikita","family":"Markovnikov","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"22_CR1","doi-asserted-by":"publisher","first-page":"77","DOI":"10.15622\/sp.58.4","volume":"58","author":"M Markovnikov","year":"2018","unstructured":"Markovnikov, M., Kipyatkova, I.: An analytic survey of end-to-end speech recognition systems. SPIIRAS Proc. 58, 77\u2013110 (2018)","journal-title":"SPIIRAS Proc."},{"key":"22_CR2","unstructured":"Soltau, H., Liao, H., Sak, H.: Neural speech recognizer: Acoustic-to-word LSTM model for large vocabulary speech recognition (2016). arXiv preprint arXiv:1610.09975 \nhttps:\/\/arxiv.org\/abs\/1610.09975"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Liao, H., McDermott, E., Senior, A.: Large scale deep neural network acoustic modeling with semi-supervised training data for YouTube video transcription. In: Proceedings of IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 368\u2013373 (2013)","DOI":"10.1109\/ASRU.2013.6707758"},{"key":"22_CR4","unstructured":"Google preferred lineup explorer \u2013 YouTube. \nhttps:\/\/www.youtube.com\/yt\/lineups\/\n\n. Accessed 17 Feb 2018"},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"T\u00fcske, Z., Audhkhasi, K., Saon, G.: Advancing sequence-to-sequence based speech recognition. In: INTERSPEECH-2019, pp. 3780\u20133784 (2019)","DOI":"10.21437\/Interspeech.2019-3018"},{"key":"22_CR6","doi-asserted-by":"crossref","unstructured":"Kim, S., Hori, T., Watanabe, S.: Joint CTC-attention based end-to-end speech recognition using multi-task learning. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP-2017), pp. 4835\u20134839 (2017)","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"22_CR7","doi-asserted-by":"crossref","unstructured":"Salazar, J., Kirchhoff, K., Huang, Z.: Self-attention networks for connectionist temporal classification in speech recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP-2019), pp. 7115\u20137119 (2019)","DOI":"10.1109\/ICASSP.2019.8682539"},{"key":"22_CR8","unstructured":"Chiu, C.C., Raffel, C.: Monotonic chunkwise attention (2017). arXiv preprint \narXiv:1712.05382"},{"key":"22_CR9","doi-asserted-by":"crossref","unstructured":"Miao, H., et al.: Online hybrid CTC\/Attention architecture for end-to-end speech recognition. In: INTERSPEECH-2019, pp. 2623\u20132627 (2019)","DOI":"10.21437\/Interspeech.2019-2018"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: INTERSPEECH-2018, pp. 2207\u20132211 (2018)","DOI":"10.21437\/Interspeech.2018-1456"},{"issue":"1","key":"22_CR11","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., et al.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"22_CR13","unstructured":"Chorowski, J.K., et al.: Attention-based models for speech recognition. In: Advances in Neural Information Processing Systems, pp. 577\u2013585 (2015)"},{"key":"22_CR14","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition(2014). arXiv preprint \narXiv:1409.1556"},{"key":"22_CR15","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift (2015). CoRR abs\/1502.03167 \nhttp:\/\/arxiv.org\/abs\/1502.03167"},{"key":"22_CR16","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, pp. 315\u2013323 (2011)"},{"key":"22_CR17","unstructured":"Tu, Z., et al.: Modeling coverage for neural machine translation (2016). arXiv preprint \narXiv:1601.04811"},{"key":"22_CR18","unstructured":"See, A., Liu, P.J., Manning, C.D.: Get to the point: Summarization with pointer-generator networks (2017). arXiv preprint \narXiv:1704.04368"},{"key":"22_CR19","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"362","DOI":"10.1007\/978-3-319-66429-3_35","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2017","unstructured":"Kipyatkova, I.: Experimenting with hybrid TDNN\/HMM acoustic models for Russian speech recognition. In: Karpov, A., Potapova, R., Mporas, I. (eds.) SPECOM 2017. LNCS (LNAI), vol. 10458, pp. 362\u2013369. Springer, Cham (2017). \nhttps:\/\/doi.org\/10.1007\/978-3-319-66429-3_35"},{"key":"22_CR20","unstructured":"Kipyatkova, I., Karpov, A.: Class-based LSTM Russian language model with linguistic information. In: Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020), pp. 2470\u20132474 (2020)"},{"key":"22_CR21","first-page":"515","volume":"2009","author":"O Jokisch","year":"2009","unstructured":"Jokisch, O., Wagner, A., Sabo, R., Jaeckel, R., Cylwik, N., Rusko, M., Ronzhin, A., Hoffmann, R.: Multilingual speech data collection for the assessment of pronunciation and prosody in a language learning system. Proceedings of SPECOM 2009, 515\u2013520 (2009)","journal-title":"Proceedings of SPECOM"},{"key":"22_CR22","unstructured":"State Standard P 50840\u201395. Speech Transmission by Communication Paths. Evaluation Methods of Quality, Intelligibility and Recognizability, p. 230. Standartov Publication, Moscow (1996). (in Russian)"},{"key":"22_CR23","unstructured":"Stepanova, S.B.: Phonetic features of Russian speech: realization and transcription, Ph.D. thesis (1988). (in Russian)"},{"key":"22_CR24","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1007\/978-3-319-43958-7_40","volume-title":"Speech and Computer","author":"V Verkhodanova","year":"2016","unstructured":"Verkhodanova, V., Ronzhin, A., Kipyatkova, I., Ivanko, D., Karpov, A., \u017delezn\u00fd, M.: HAVRUS corpus: high-speed recordings of audio-visual Russian speech. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 338\u2013345. Springer, Cham (2016). \nhttps:\/\/doi.org\/10.1007\/978-3-319-43958-7_40"},{"issue":"3","key":"22_CR25","doi-asserted-by":"publisher","first-page":"546","DOI":"10.1134\/S1054661809030225","volume":"19","author":"AA Karpov","year":"2009","unstructured":"Karpov, A.A., Ronzhin, A.L.: Information enquiry kiosk with multimodal user interface. Pattern Recogn. Image Anal. 19(3), 546\u2013558 (2009)","journal-title":"Pattern Recogn. Image Anal."},{"key":"22_CR26","unstructured":"Freitag, M., Al-Onaizan, Y.: Beam search strategies for neural machine translation (2017). arXiv preprint \narXiv:1702.01806"},{"key":"22_CR27","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/978-3-030-26061-3_35","volume-title":"Speech and Computer","author":"N Markovnikov","year":"2019","unstructured":"Markovnikov, N., Kipyatkova, I.: Investigating joint CTC-attention models for end-to-end Russian speech recognition. In: Salah, A.A., Karpov, A., Potapova, R. (eds.) SPECOM 2019. LNCS (LNAI), vol. 11658, pp. 337\u2013347. Springer, Cham (2019). \nhttps:\/\/doi.org\/10.1007\/978-3-030-26061-3_35"},{"key":"22_CR28","unstructured":"Jang, E., Gu, S., Poole, B.: Categorical reparameterization with gumbel-softmax (2016). arXiv preprint \narXiv:1611.01144"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:07:48Z","timestamp":1601795268000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}