{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T18:43:24Z","timestamp":1776883404004,"version":"3.51.2"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032079589","type":"print"},{"value":"9783032079596","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07959-6_1","type":"book-chapter","created":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T09:22:04Z","timestamp":1760260924000},"page":"3-12","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["In-Domain SSL Pre-training and\u00a0Streaming ASR: Application to\u00a0Air Traffic Control Communications"],"prefix":"10.1007","author":[{"given":"Jarod","family":"Duret","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Salima","family":"Mdhaffar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ga\u00eblle","family":"Laperri\u00e8re","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ryan","family":"Whetten","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Audrey","family":"Galametz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Catherine","family":"Kobus","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marion-C\u00e9cile","family":"Martin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jo","family":"Oleiwan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yannick","family":"Est\u00e8ve","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Babu, A., et\u00a0al.: Xls-r: Self-supervised cross-lingual speech representation learning at scale. arXiv preprint arXiv:2111.09296 (2021)","DOI":"10.21437\/Interspeech.2022-143"},{"key":"1_CR2","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"1_CR3","unstructured":"Barrault, L., et\u00a0al.: Seamless: multilingual expressive and streaming speech translation. arXiv preprint arXiv:2312.05187 (2023)"},{"issue":"6","key":"1_CR4","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1109\/JSTSP.2022.3188113","volume":"16","author":"S Chen","year":"2022","unstructured":"Chen, S., et al.: Wavlm: large-scale self-supervised pre-training for full stack speech processing. IEEE J. Sel. Topics Signal Process. 16(6), 1505\u20131518 (2022)","journal-title":"IEEE J. Sel. Topics Signal Process."},{"key":"1_CR5","unstructured":"Chiu, C.C., Qin, J., Zhang, Y., Yu, J., Wu, Y.: Self-supervised learning with random-projection quantizer for speech recognition. In: International Conference on Machine Learning, pp. 3915\u20133924. PMLR (2022)"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Chung, Y.A., et al.: W2v-BERT: combining contrastive learning and masked language modeling for self-supervised speech pre-training. In: 2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 244\u2013250. IEEE (2021)","DOI":"10.1109\/ASRU51503.2021.9688253"},{"key":"1_CR7","unstructured":"Delpech, E., et al.: A real-life, French-accented corpus of air traffic control communications. In: Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018). European Language Resources Association (ELRA), Miyazaki, Japan (2018). https:\/\/aclanthology.org\/L18-1453\/"},{"key":"1_CR8","unstructured":"Gomez, J.P.Z., et\u00a0al.: ATCO2 corpus: a large-scale dataset for research on automatic speech recognition and natural language understanding of Air Traffic Control communications. J. Data-centric Mach. Learn. Res. (2024)"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Helmke, H., Ohneiser, O., M\u00fchlhausen, T., Wies, M.: Reducing controller workload with automatic speech recognition. In: 2016 IEEE\/AIAA 35th Digital Avionics Systems Conference (DASC), pp. 1\u201310. IEEE (2016)","DOI":"10.1109\/DASC.2016.7778024"},{"key":"1_CR10","unstructured":"Hofbauer, K., Petrik, S., Hering, H.: The atcosim corpus of non-prompted clean air traffic control speech. In: LREC. Citeseer (2008)"},{"key":"1_CR11","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"WN Hsu","year":"2021","unstructured":"Hsu, W.N., Bolte, B., Tsai, Y.H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: HuBERT: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3451\u20133460 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Kanagawa, H., Ijima, Y.: Knowledge distillation from self-supervised representation learning model with discrete speech units for any-to-any streaming voice conversion. In: Proceedings of Interspeech 2024, pp. 4393\u20134397 (2024)","DOI":"10.21437\/Interspeech.2024-924"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Li, X., Huybrechts, G., Ronanki, S., Farris, J., Bodapati, S.: Dynamic chunk convolution for unified streaming and non-streaming conformer asr. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135. IEEE (2023)","DOI":"10.1109\/ICASSP49357.2023.10097062"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Pellegrini, T., Farinas, J., Delpech, E., Lancelot, F.: The Airbus Air Traffic Control speech recognition 2018 challenge: towards ATC automatic transcription and call sign detection. In: 20th Annual Conference of the International Speech Communication Association (INTERSPEECH 2019), pp. 2993\u20132997 (2019)","DOI":"10.21437\/Interspeech.2019-1962"},{"key":"1_CR15","unstructured":"Pratap, V., et al.: Scaling speech technology to 1,000+ languages. arXiv (2023)"},{"key":"1_CR16","unstructured":"Ravanelli, M., et\u00a0al.: Open-source conversational AI with Speechbrain\u00a01.0. J. Mach. Learn. Res. 25(333), 1\u201311 (2024)"},{"key":"1_CR17","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/s10579-019-09449-5","volume":"53","author":"L \u0160m\u00eddl","year":"2019","unstructured":"\u0160m\u00eddl, L., \u0160vec, J., Tihelka, D., Matou\u0161ek, J., Romportl, J., Ircing, P.: Air traffic control communication (atcc) speech corpora and their use for asr and tts development. Lang. Resour. Eval. 53, 449\u2013464 (2019)","journal-title":"Lang. Resour. Eval."},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Whetten, R., Kennington, C.: Open implementation and study of BEST-RQ for speech processing. In: IEEE ICASSP 2024 Workshop on Self-Supervision in Audio, Speech and Beyond (SASB 2024) (2024)","DOI":"10.1109\/ICASSPW62465.2024.10626364"},{"key":"1_CR19","unstructured":"Zhang, B., et al.: Unified streaming and non-streaming two-pass end-to-end model for speech recognition. arXiv preprint arXiv:2012.05481 (2020)"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Zuluaga-Gomez, J., et al.: How does pre-trained wav2vec 2.0 perform on domain-shifted ASR? An extensive benchmark on Air Traffic Control communications. In: 2022 IEEE Spoken Language Technology Workshop (SLT), pp. 205\u2013212. IEEE (2023)","DOI":"10.1109\/SLT54892.2023.10022724"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07959-6_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T09:22:12Z","timestamp":1760260932000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07959-6_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"ISBN":["9783032079589","9783032079596"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07959-6_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"13 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Szeged","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hungary","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom.inf.u-szeged.hu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}