{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:58:48Z","timestamp":1740099528153,"version":"3.37.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030260606"},{"type":"electronic","value":"9783030260613"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-26061-3_35","type":"book-chapter","created":{"date-parts":[[2019,8,8]],"date-time":"2019-08-08T19:03:54Z","timestamp":1565291034000},"page":"337-347","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Investigating Joint CTC-Attention Models for End-to-End Russian Speech Recognition"],"prefix":"10.1007","author":[{"given":"Nikita","family":"Markovnikov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Irina","family":"Kipyatkova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,7,24]]},"reference":[{"key":"35_CR1","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint \n                    arXiv:1409.0473\n                    \n                   (2014)"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Bahdanau, D., et al.: End-to-end attention-based large vocabulary speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4945\u20134949 (2016)","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"35_CR3","doi-asserted-by":"publisher","unstructured":"Besacier, L., Barnard, E., Karpov, A., Schultz, T.: Automatic speech recognition for under-resourced languages: a survey. Speech Commun. 56, 85\u2013100 (2014). \n                    https:\/\/doi.org\/10.1016\/j.specom.2013.07.008","DOI":"10.1016\/j.specom.2013.07.008"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4960\u20134964. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"35_CR5","unstructured":"Cho, K., van Merrienboer, B., G\u00fcl\u00e7ehre, \u00c7., Bougares, F., Schwenk, H., Bengio, Y.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. CoRR abs\/1406.1078 (2014). \n                    http:\/\/arxiv.org\/abs\/1406.1078"},{"key":"35_CR6","unstructured":"Chorowski, J.K., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: Advances in Neural Information Processing Systems, pp. 577\u2013585 (2015)"},{"key":"35_CR7","doi-asserted-by":"crossref","unstructured":"Freitag, M., Al-Onaizan, Y.: Beam search strategies for neural machine translation. arXiv preprint \n                    arXiv:1702.01806\n                    \n                   (2017)","DOI":"10.18653\/v1\/W17-3207"},{"key":"35_CR8","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ACM (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"35_CR9","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. CoRR abs\/1502.03167 (2015). \n                    http:\/\/arxiv.org\/abs\/1502.03167"},{"key":"35_CR10","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.specom.2013.07.004","volume":"56","author":"A Karpov","year":"2014","unstructured":"Karpov, A., Markov, K., Kipyatkova, I., Vazhenina, D., Ronzhin, A.: Large vocabulary Russian speech recognition using syntactico-statistical language modeling. Speech Commun. 56, 213\u2013228 (2014). \n                    https:\/\/doi.org\/10.1016\/j.specom.2013.07.004","journal-title":"Speech Commun."},{"key":"35_CR11","unstructured":"Kim, S., Hori, T., Watanabe, S.: Joint CTC-attention based end-to-end speech recognition using multi-task learning. CoRR abs\/1609.06773 (2016). \n                    http:\/\/arxiv.org\/abs\/1609.06773"},{"key":"35_CR12","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/978-3-319-01931-4_29","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2013","unstructured":"Kipyatkova, I., Karpov, A.: Lexicon size and language model order optimization for Russian LVCSR. In: \u017delezn\u00fd, M., Habernal, I., Ronzhin, A. (eds.) SPECOM 2013. LNCS (LNAI), vol. 8113, pp. 219\u2013226. Springer, Cham (2013). \n                    https:\/\/doi.org\/10.1007\/978-3-319-01931-4_29"},{"key":"35_CR13","unstructured":"Kurata, G., Ramabhadran, B., Saon, G., Sethy, A.: Language modeling with highway LSTM. CoRR abs\/1709.06436 (2017). \n                    http:\/\/arxiv.org\/abs\/1709.06436"},{"key":"35_CR14","unstructured":"Martins, A.F.T., Astudillo, R.F.: From softmax to sparsemax: a sparse model of attention and multi-label classification. CoRR abs\/1602.02068 (2016). \n                    http:\/\/arxiv.org\/abs\/1602.02068"},{"key":"35_CR15","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: Understanding the exploding gradient problem. CoRR abs\/1211.5063 (2012). \n                    http:\/\/arxiv.org\/abs\/1211.5063"},{"issue":"1","key":"35_CR16","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G.E., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"35_CR17","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems, pp. 3104\u20133112 (2014)"},{"key":"35_CR18","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"35_CR19","doi-asserted-by":"crossref","unstructured":"Verhelst, W., Roelands, M.: An overlap-add technique based on waveform similarity (WSOLA) for high quality time-scale modification of speech. In: 1993 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP-1993, vol. 2, pp. 554\u2013557. IEEE (1993)","DOI":"10.1109\/ICASSP.1993.319366"},{"key":"35_CR20","doi-asserted-by":"crossref","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: Interspeech, pp. 2207\u20132211 (2018). \n                    http:\/\/dx.doi.org\/10.21437\/Interspeech.2018-1456","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"35_CR21","unstructured":"Zeiler, M.D.: ADADELTA: an adaptive learning rate method. arXiv preprint \n                    arXiv:1212.5701\n                    \n                   (2012)"},{"key":"35_CR22","unstructured":"Zeyer, A., Irie, K., Schl\u00fcter, R., Ney, H.: Improved training of end-to-end attention models for speech recognition. CoRR abs\/1805.03294 (2018). \n                    http:\/\/arxiv.org\/abs\/1805.03294"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-26061-3_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,8]],"date-time":"2019-08-08T19:08:40Z","timestamp":1565291320000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-26061-3_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030260606","9783030260613"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-26061-3_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"24 July 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Istanbul","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turkey","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 August 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"86","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"57","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}