{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:26:05Z","timestamp":1742912765924,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030895785"},{"type":"electronic","value":"9783030895792"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-89579-2_7","type":"book-chapter","created":{"date-parts":[[2021,10,16]],"date-time":"2021-10-16T21:08:32Z","timestamp":1634418512000},"page":"73-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Augmenting ASR for User-Generated Videos with Semi-supervised Training and Acoustic Model Adaptation for\u00a0Spoken Content Retrieval"],"prefix":"10.1007","author":[{"given":"Yasufumi","family":"Moriya","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gareth J. F.","family":"Jones","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., Jiang, H.: Fast speaker adaptation of hybrid NN\/HMM model for speech recognition based on discriminative learning of speaker code. In: Proceedings of ICASSP 2013, pp. 7942\u20137946 (2013)","DOI":"10.1109\/ICASSP.2013.6639211"},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Hadian, H., Sameti, H., Povey, D., Khudanpur, S.: End-to-end speech recognition using lattice-free mmi. In: Proceedings of Interspeech 2018, pp. 12\u201316 (2018)","DOI":"10.21437\/Interspeech.2018-1423"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Kudo, T., Richardson, J.: SentencePiece: a simple and language independent subword tokenizer and detokenizer for neural text processing. In: Conference on Empirical Methods in Natural Language Processing (EMNLP 2018), pp. 66\u201371 (2018)","DOI":"10.18653\/v1\/D18-2012"},{"issue":"4\u20135","key":"7_CR4","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1561\/1500000020","volume":"4","author":"M Larson","year":"2012","unstructured":"Larson, M., Jones, G.J.F.: Spoken content retrieval: a survey of techniques and technologies. Found. Trends Inf. Retr. 4(4\u20135), 235\u2013422 (2012)","journal-title":"Found. Trends Inf. Retr."},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"L\u00fcscher, C., et al.: RWTH ASR systems for librispeech: hybrid vs attention. In: Proceedings of Interspeech 2019, pp. 231\u2013235 (2019)","DOI":"10.21437\/Interspeech.2019-1780"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Majumdar, S., Ginsburg, B.: MatchboxNet: 1D Time-channel separable convolutional neural network architecture for speech commands recognition. In: Proceedings of Interspeech 2020, pp. 3356\u20133360 (2020)","DOI":"10.21437\/Interspeech.2020-1058"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Manohar, V., Hadian, H., Povey, D., Khudanpur, S.: Semi-supervised training of acoustic models using lattice-free mmi. In: Proceedings of ICASSP 2018, pp. 4844\u20134848 (2018)","DOI":"10.1109\/ICASSP.2018.8462331"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Narayanan, A., et al.: Toward domain-invariant speech recognition via large scale training. In: Proceedings of SLT 2018, pp. 441\u2013447 (2018)","DOI":"10.1109\/SLT.2018.8639610"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: LibriSpeech: an ASR corpus based on public domain audio books. In: Proceedings of ICASSP 2015, pp. 5206\u20135210 (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"7_CR10","unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: Proceedings of ASRU 2011, pp. 1\u20134 (2011)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Povey, D., et al.: Purely sequence-trained neural networks for ASR based on lattice-free mmi. In: Proceedings of Interspeech 2016, pp. 2751\u20132755 (2016)","DOI":"10.21437\/Interspeech.2016-595"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Robertson, S.E., Walker, S., Jones, S., Hancock-Beaulieu, M., Gatford, M.: Okapi at TREC-3. In: Proceedings of TREC 3, vol. 500\u2013225, pp. 109\u2013126 (1994)","DOI":"10.6028\/NIST.SP.500-225.routing-city"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Sainath, T.N., et al.: A streaming on-device end-to-end model surpassing server-side conventional model quality and latency. In: Proceedings of ICASSP 2020, pp. 6059\u20136063 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054188"},{"key":"7_CR14","unstructured":"Sanabria, R., et al.: How2: a large-scale dataset for multimodal language understanding. In: Proceedings of the Workshop on Visually Grounded Interaction and Language (ViGIL). NeurIPS (2018)"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Saon, G., Soltau, H., Nahamoo, D., Picheny, M.: Speaker adaptation of neural network acoustic models using i-vectors. In: Proceedings of ASRU 2013, pp. 55\u201359 (2013)","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Schmiedeke, S., et al.: Blip10000: a social video dataset containing SPUG content for tagging and retrieval. In: Proceedings of ACM MMSys 2013 (2013)","DOI":"10.1145\/2483977.2483988"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., Khudanpur, S.: X-vectors: Robust DNN embeddings for speaker recognition. In: Proceedings of ICASSP 2018, pp. 5329\u20135333 (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Stolcke, A.: SRILM-an extensible language modeling toolkit. In: Proceedings of International Conference on Spoken Language Processing (ICSLP 2002) (2002)","DOI":"10.21437\/ICSLP.2002-303"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Vesel\u1ef3, K., Hannemann, M., Burget, L.: Semi-supervised training of deep neural networks. In: Proceedings of ASRU 2013, pp. 267\u2013272 (2013)","DOI":"10.1109\/ASRU.2013.6707741"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: Espresso: a fast end-to-end neural speech recognition toolkit. In: Proceedings of ASRU 2019 (2019)","DOI":"10.1109\/ASRU46091.2019.9003968"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Xu, H., et al.: A pruned rnnlm lattice-rescoring algorithm for automatic speech recognition. In: Proceedings of ICASSP 2018, pp. 5929\u20135933 (2018)","DOI":"10.1109\/ICASSP.2018.8461974"},{"issue":"7","key":"7_CR22","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1016\/j.specom.2010.02.014","volume":"52","author":"K Yu","year":"2010","unstructured":"Yu, K., Gales, M., Wang, L., Woodland, P.C.: Unsupervised training and directed manual transcription for LVCSR. Speech Commun. 52(7), 652\u2013663 (2010)","journal-title":"Speech Commun."}],"container-title":["Lecture Notes in Computer Science","Statistical Language and Speech Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-89579-2_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T01:52:16Z","timestamp":1725933136000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-89579-2_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030895785","9783030895792"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-89579-2_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"17 October 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SLSP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Statistical Language and Speech Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cardiff","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"slsp2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/irdta.eu\/slsp2020-2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}