{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:20:55Z","timestamp":1776885655686,"version":"3.51.2"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030602758","type":"print"},{"value":"9783030602765","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_27","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"267-278","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":52,"title":["CTC-Segmentation of Large Corpora for German End-to-End Speech Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5312-3870","authenticated-orcid":false,"given":"Ludwig","family":"K\u00fcrzinger","sequence":"first","affiliation":[]},{"given":"Dominik","family":"Winkelbauer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0641-3178","authenticated-orcid":false,"given":"Lujun","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3552-3325","authenticated-orcid":false,"given":"Tobias","family":"Watzel","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1096-1596","authenticated-orcid":false,"given":"Gerhard","family":"Rigoll","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"27_CR1","unstructured":"Ardila, R., et al.: Common voice: a massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670 (2019)"},{"key":"27_CR2","unstructured":"Baumann, T., K\u00f6hn, A., Hennig, F.: The spoken Wikipedia corpus collection (2016)"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4960\u20134964. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"27_CR4","first-page":"170","volume":"2019","author":"P Denisov","year":"2019","unstructured":"Denisov, P., Vu, N.T.: IMS-speech: a speech to text tool. Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2019, 170\u2013177 (2019)","journal-title":"Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ACM (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"27_CR6","unstructured":"Gutenberg, n.: Projekt gutenberg-de (2019). https:\/\/gutenberg.spiegel.de"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Karita, S., et al.: A comparative study on transformer vs RNN in speech applications. arXiv preprint arXiv:1909.06317 (2019)","DOI":"10.1109\/ASRU46091.2019.9003750"},{"key":"27_CR8","unstructured":"Lamere, P., et al.: The CMU sphinx-4 speech recognition system. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2003), Hong Kong, vol. 1, pp. 2\u20135 (2003)"},{"key":"27_CR9","unstructured":"Librivox, N.: Librivox: free public domain audiobooks (2020). https:\/\/librivox.org\/"},{"key":"27_CR10","unstructured":"Milde, B., K\u00f6hn, A.: Open source automatic speech recognition for German. In: Speech Communication; 13th ITG-Symposium, pp. 1\u20135. VDE (2018)"},{"key":"27_CR11","unstructured":"Pettarin, A.: Aeneas (2017). https:\/\/www.readbeyond.it\/aeneas\/"},{"key":"27_CR12","unstructured":"Povey, D., et al.: The kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. IEEE Signal Processing Society, December 2011. IEEE Catalog No.: CFP11SRW-USB"},{"key":"27_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1007\/978-3-319-24033-6_54","volume-title":"Text, Speech, and Dialogue","author":"S Radeck-Arneth","year":"2015","unstructured":"Radeck-Arneth, S., et al.: Open source German distant speech recognition: corpus and acoustic model. In: Kr\u00e1l, P., Matou\u0161ek, V. (eds.) TSD 2015. LNCS (LNAI), vol. 9302, pp. 480\u2013488. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24033-6_54"},{"key":"27_CR14","unstructured":"Rousseau, A., Del\u00e9glise, P., Esteve, Y.: Enhancing the TED-LIUM corpus with selected data for language modeling and more ted talks. In: LREC, pp. 3935\u20133939 (2014)"},{"key":"27_CR15","unstructured":"Schiel, F.: Automatic phonetic transcription of non-prompted speech. In: Proceedings of the ICPhS, pp. 607\u2013610. San Francisco, August 1999"},{"key":"27_CR16","unstructured":"Solak, I.: The M-AILABS speech dataset (2019). https:\/\/www.caito.de\/2019\/01\/the-m-ailabs-speech-dataset\/"},{"key":"27_CR17","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"27_CR18","doi-asserted-by":"publisher","unstructured":"Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T.: Hybrid CTC\/Attention architecture for end-to-end speech recognition. IEEE J. Sel. Top. Sig. Process. 11(8), 1240\u20131253, December 2017. https:\/\/doi.org\/10.1109\/JSTSP.2017.2763455","DOI":"10.1109\/JSTSP.2017.2763455"},{"key":"27_CR19","doi-asserted-by":"publisher","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: Interspeech, pp. 2207\u20132211 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1456 . http:\/\/dx.doi.org\/10.21437\/Interspeech.2018-1456","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"27_CR20","unstructured":"Young, S.J., Young, S.: The HTK hidden Markov model toolkit: design and philosophy. University of Cambridge, Department of Engineering Cambridge, England (1993)"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,22]],"date-time":"2022-11-22T02:42:21Z","timestamp":1669084941000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}