{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:17:09Z","timestamp":1742966229422,"version":"3.40.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602758"},{"type":"electronic","value":"9783030602765"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_62","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"646-656","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Synchronized Forward-Backward Transformer for End-to-End Speech Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3552-3325","authenticated-orcid":false,"given":"Tobias","family":"Watzel","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5312-3870","authenticated-orcid":false,"given":"Ludwig","family":"K\u00fcrzinger","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0641-3178","authenticated-orcid":false,"given":"Lujun","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1096-1596","authenticated-orcid":false,"given":"Gerhard","family":"Rigoll","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"62_CR1","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"62_CR2","unstructured":"Cer, D., et al.: Universal sentence encoder. arXiv preprint arXiv:1803.11175 (2018)"},{"key":"62_CR3","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4960\u20134964. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"62_CR4","doi-asserted-by":"crossref","unstructured":"Chiu, C.C., et al.: State-of-the-art speech recognition with sequence-to-sequence models. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4774\u20134778. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"62_CR5","unstructured":"Chorowski, J., Bahdanau, D., Cho, K., Bengio, Y.: End-to-end continuous speech recognition using attention-based recurrent nn: first results. arXiv preprint arXiv:1412.1602 (2014)"},{"key":"62_CR6","unstructured":"Cuturi, M., Blondel, M.: Soft-DTW: a differentiable loss function for time-series. In: Proceedings of the 34th International Conference on Machine Learning-Volume 70, pp. 894\u2013903. JMLR.org (2017)"},{"key":"62_CR7","doi-asserted-by":"crossref","unstructured":"Graves, A.: Sequence transduction with recurrent neural networks. arXiv preprint arXiv:1211.3711 (2012)","DOI":"10.1007\/978-3-642-24797-2_3"},{"key":"62_CR8","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ACM (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"62_CR9","unstructured":"Graves, A., Jaitly, N.: Towards end-to-end speech recognition with recurrent neural networks. In: International Conference on Machine Learning, pp. 1764\u20131772 (2014)"},{"key":"62_CR10","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A.r., Hinton, G.: Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"62_CR11","unstructured":"Han, Y.S., Yoo, J., Ye, J.C.: Deep residual learning for compressed sensing ct reconstruction via persistent homology analysis. arXiv preprint arXiv:1611.06391 (2016)"},{"key":"62_CR12","doi-asserted-by":"crossref","unstructured":"Hori, T., Watanabe, S., Zhang, Y., Chan, W.: Advances in joint ctc-attention based end-to-end speech recognition with a deep cnn encoder and rnn-lm. arXiv preprint arXiv:1706.02737 (2017)","DOI":"10.21437\/Interspeech.2017-1296"},{"key":"62_CR13","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"62_CR14","doi-asserted-by":"crossref","unstructured":"Lu, L., Zhang, X., Renais, S.: On training the recurrent neural network encoder-decoder for large vocabulary end-to-end speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5060\u20135064. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472641"},{"key":"62_CR15","doi-asserted-by":"crossref","unstructured":"Luong, M.T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation. arXiv preprint arXiv:1508.04025 (2015)","DOI":"10.18653\/v1\/D15-1166"},{"key":"62_CR16","doi-asserted-by":"crossref","unstructured":"Mimura, M., Sakai, S., Kawahara, T.: Forward-backward attention decoder. In: Interspeech, pp. 2232\u20132236 (2018)","DOI":"10.21437\/Interspeech.2018-1160"},{"key":"62_CR17","unstructured":"Povey, D., et al.: The kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. No. CONF, IEEE Signal Processing Society (2011)"},{"key":"62_CR18","unstructured":"Rousseau, A., Del\u00e9glise, P., Esteve, Y.: Enhancing the TED-LIUM corpus with selected data for language modeling and more TED talks. In: LREC, pp. 3935\u20133939 (2014)"},{"key":"62_CR19","doi-asserted-by":"crossref","unstructured":"Sak, H., Shannon, M., Rao, K., Beaufays, F.: Recurrent neural aligner: an encoder-decoder neural network model for sequence to sequence mapping. In: Interspeech, pp. 1298\u20131302 (2017)","DOI":"10.21437\/Interspeech.2017-1705"},{"key":"62_CR20","doi-asserted-by":"crossref","unstructured":"Salazar, J., Kirchhoff, K., Huang, Z.: Self-attention networks for connectionist temporal classification in speech recognition. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7115\u20137119. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8682539"},{"key":"62_CR21","doi-asserted-by":"crossref","unstructured":"Sperber, M., Niehues, J., Neubig, G., St\u00fcker, S., Waibel, A.: Self-attentional acoustic models. arXiv preprint arXiv:1803.09519 (2018)","DOI":"10.21437\/Interspeech.2018-1910"},{"issue":"1","key":"62_CR22","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"62_CR23","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"62_CR24","doi-asserted-by":"publisher","first-page":"4395","DOI":"10.21437\/Interspeech.2019-2203","volume":"2019","author":"Z Tian","year":"2019","unstructured":"Tian, Z., Yi, J., Tao, J., Bai, Y., Wen, Z.: Self-attention transducers for end-to-end speech recognition. Proc. Interspeech 2019, 4395\u20134399 (2019)","journal-title":"Proc. Interspeech"},{"key":"62_CR25","doi-asserted-by":"publisher","first-page":"3780","DOI":"10.21437\/Interspeech.2019-3018","volume":"2019","author":"Z T\u00fcske","year":"2019","unstructured":"T\u00fcske, Z., Audhkhasi, K., Saon, G.: Advancing sequence-to-sequence based speech recognition. Proc. Interspeech 2019, 3780\u20133784 (2019)","journal-title":"Proc. Interspeech"},{"key":"62_CR26","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"62_CR27","doi-asserted-by":"crossref","unstructured":"Watanabe, S., et al.: Espnet: end-to-end speech processing toolkit. arXiv preprint arXiv:1804.00015 (2018)","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"62_CR28","doi-asserted-by":"crossref","unstructured":"Watzel, T., K\u00fcrzinger, L., Li, L., Rigoll, G.: Regularized forward-backward decoder for attention models. arXiv preprint arXiv:2006.08506 (2020)","DOI":"10.1007\/978-3-030-87802-3_70"},{"key":"62_CR29","doi-asserted-by":"crossref","unstructured":"Zheng, Y., et al.: Forward-backward decoding for regularizing end-to-end TTS. arXiv preprint arXiv:1907.09006 (2019)","DOI":"10.21437\/Interspeech.2019-2325"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_62","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,22]],"date-time":"2022-11-22T02:44:59Z","timestamp":1669085099000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_62"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_62","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}