{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T21:05:03Z","timestamp":1744059903424,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031483110"},{"type":"electronic","value":"9783031483127"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-48312-7_7","type":"book-chapter","created":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T20:03:21Z","timestamp":1700597001000},"page":"87-99","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Phone Durations Modeling for Livvi-Karelian ASR"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1264-4458","authenticated-orcid":false,"given":"Irina","family":"Kipyatkova","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1196-1117","authenticated-orcid":false,"given":"Ildar","family":"Kagirov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,22]]},"reference":[{"issue":"8","key":"7_CR1","doi-asserted-by":"publisher","first-page":"1018","DOI":"10.3390\/sym11081018","volume":"11","author":"D Wang","year":"2019","unstructured":"Wang, D., Wang, X., Lv, S.: An overview of end-to-end automatic speech recognition. Symmetry 11(8), 1018 (2019)","journal-title":"Symmetry"},{"doi-asserted-by":"crossref","unstructured":"Bahdanau, D., Chorowski, J., Serdyuk, D., Brakel, P., Bengio, Y.: End-to-end attention-based large vocabulary speech recognition. In: Proceedings of 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4945\u20134949. The Institute of Electrical and Electronics Engineers (2016)","key":"7_CR2","DOI":"10.1109\/ICASSP.2016.7472618"},{"doi-asserted-by":"crossref","unstructured":"Hori, T., Watanabe, S., Zhang, Y., Chan, W.: Advances in joint CTC-attention based end-to-end speech recognition with a deep CNN encoder and RNN-LM. In: Proceedings of the 18th Annual Conference of the International Speech Communication Association (Interspeech), pp. 949\u2013953. International Speech Communication Association (2017)","key":"7_CR3","DOI":"10.21437\/Interspeech.2017-1296"},{"key":"7_CR4","doi-asserted-by":"publisher","first-page":"73005","DOI":"10.1109\/ACCESS.2020.2988365","volume":"8","author":"X Sun","year":"2020","unstructured":"Sun, X., Yang, Q., Liu, S., Yuan, X.: Improving low-resource speech recognition based on improved NN-HMM structures. IEEE Access 8, 73005\u201373014 (2020)","journal-title":"IEEE Access"},{"doi-asserted-by":"crossref","unstructured":"Karunathilaka, H., Welgama, V., Nadungodage, T., Weerasinghe, R.: Low-resource Sinhala speech recognition using deep learning. In: Proceedings of 20th International Conference on Advances in ICT for Emerging Regions (ICTer), pp. 196\u2013201. The Institute of Electrical and Electronics Engineers (2020)","key":"7_CR5","DOI":"10.1109\/ICTer51097.2020.9325468"},{"unstructured":"Gupta, V., Boulianne, G.: Progress in multilingual speech recognition for low resource languages Kurmanji Kurdish, Cree and Inuktut. In: Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 6420\u20136428. European Language Resources Association (2022)","key":"7_CR6"},{"doi-asserted-by":"crossref","unstructured":"Biswas, A., Menon, R., van der Westhuizen, E., Niesler, Th.: Improved low-resource Somali speech recognition by semi-supervised acoustic and language model training. In: Proceedings of 20th Annual Conference of the International Speech Communication Association (Interspeech), pp. 3008\u20133012. International Speech Communication Association (2019)","key":"7_CR7","DOI":"10.21437\/Interspeech.2019-1328"},{"doi-asserted-by":"crossref","unstructured":"Pulugundla, B., et al.: BUT system for low resource Indian language ASR. In: Proceedings of 20th Annual Conference of the International Speech Communication Association (Interspeech), pp. 3182\u20133186. International Speech Communication Association (2019)","key":"7_CR8","DOI":"10.21437\/Interspeech.2018-1302"},{"doi-asserted-by":"crossref","unstructured":"Fathima, N., Patel, T., Mahima, C., Iyengar, A.: TDNN-based multilingual speech recognition system for low resource Indian languages. In: Proceedings of 19th Annual Conference of the International Speech Communication Association (Interspeech), pp. 3197\u20133201. International Speech Communication Association (2018)","key":"7_CR9","DOI":"10.21437\/Interspeech.2018-2117"},{"doi-asserted-by":"crossref","unstructured":"Wills, S., Uys, P., van Heerden, C.J., Barnard, E.: Language modeling for speech analytics in under-resourced languages. In: Proceedings of 21st Annual Conference of the International Speech Communication Association (Interspeech), pp. 4941\u20134945. International Speech Communication Association (2020)","key":"7_CR10","DOI":"10.21437\/Interspeech.2020-1586"},{"issue":"3","key":"7_CR11","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1109\/TASLP.2015.2400218","volume":"23","author":"M Sundermeyer","year":"2015","unstructured":"Sundermeyer, M., Ney, H., Schl\u00fcter, R.: From feedforward to recurrent LSTM neural networks for language modeling. IEEE\/ACM Trans. Audio Speech Lang. Process. 23(3), 517\u2013529 (2015)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"7_CR12","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/978-3-030-26061-3_23","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2019","unstructured":"Kipyatkova, I.: LSTM-based language models for very large vocabulary continuous Russian speech recognition system. In: Salah, A.A., Karpov, A., Potapova, R. (eds.) SPECOM 2019. LNCS (LNAI), vol. 11658, pp. 219\u2013226. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-26061-3_23"},{"key":"7_CR13","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1093\/oso\/9780198767664.003.0020","volume-title":"The Oxford Guide to the Uralic Languages","author":"H Metslang","year":"2022","unstructured":"Metslang, H.: North and standard estonian. In: Bakr\u00f3-Nagy, M., Laakso, J., Skribnik, E. (eds.) The Oxford Guide to the Uralic Languages, pp. 350\u2013366. Oxford Academic, Oxford (2022)"},{"issue":"1","key":"7_CR14","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.wocn.2008.08.002","volume":"37","author":"S Nakai","year":"2009","unstructured":"Nakai, S., Kunnari, S., Turk, A., Suomi, K., Ylitalo, R.: Utterance-final lengthening and quantity in Northern Finnish. J. Phon. 37(1), 29\u201345 (2009)","journal-title":"J. Phon."},{"key":"7_CR15","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1159\/000073502","volume":"60","author":"H Traunm\u00fcller","year":"2003","unstructured":"Traunm\u00fcller, H., Krull, D.: The effect of local speaking rate on the perception of quantity in Estonian. Phonetica 60, 187\u2013207 (2003)","journal-title":"Phonetica"},{"issue":"3","key":"7_CR16","doi-asserted-by":"publisher","first-page":"303","DOI":"10.15388\/Informatica.2004.062","volume":"15","author":"T Alum\u00e4e","year":"2004","unstructured":"Alum\u00e4e, T., Vohandu, L.: Limited-vocabulary Estonian continuous speech recognition system using Hidden Markov models. Informatica 15(3), 303\u2013314 (2004)","journal-title":"Informatica"},{"unstructured":"Alum\u00e4e, T.: Recent improvements in Estonian LVCSR. In: Proceedings of 4th Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU 2014), pp. 118\u2013123. European Language Resources association (2014)","key":"7_CR17"},{"issue":"1","key":"7_CR18","first-page":"137","volume":"7","author":"MA Kermanshahi","year":"2019","unstructured":"Kermanshahi, M.A., Homayounpour, M.M.: Improving phoneme sequence recognition using phoneme duration information in DNN-HSMM. J. Artif. Intell. Data Min. 7(1), 137\u2013147 (2019)","journal-title":"J. Artif. Intell. Data Min."},{"doi-asserted-by":"crossref","unstructured":"Qin, Y., Lee, T., Kong, A.P.H., Law, S.P.: Towards automatic assessment of aphasia speech using automatic speech recognition techniques. In: Proceedings of 2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP), pp. 1\u20134. The Institute of Electrical and Electronics Engineers (2016)","key":"7_CR19","DOI":"10.1109\/ISCSLP.2016.7918445"},{"doi-asserted-by":"publisher","unstructured":"Rosenfelder, I., et al.: FAVE (Forced Alignment and Vowel Extraction) Suite Version 1.1.3. Software. https:\/\/doi.org\/10.5281\/zenodo.9846. Accessed 13 July 2023","key":"7_CR20","DOI":"10.5281\/zenodo.9846"},{"issue":"5","key":"7_CR21","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1109\/LSP.2005.845598","volume":"12","author":"M Johnson","year":"2005","unstructured":"Johnson, M.: Capacity and complexity of HMM duration modeling techniques. IEEE Signal Process. Lett. 12(5), 407\u2013410 (2005)","journal-title":"IEEE Signal Process. Lett."},{"doi-asserted-by":"crossref","unstructured":"Pylkk\u00f6nen, J.: Phone duration modeling techniques in continuous speech recognition. Master\u2019s thesis, Helsinki University of Technology (2004)","key":"7_CR22","DOI":"10.21437\/Interspeech.2004-164"},{"unstructured":"Pylkk\u00f6nen, J., Kurimo, M.: Using phone durations in finnish large vocabulary continuous speech recognition. In: Proceedings of the 6th Nordic Signal Processing Symposium (NORSIG), pp. 324\u2013327. The Institute of Electrical and Electronics Engineers (2005)","key":"7_CR23"},{"unstructured":"VEPKAR. http:\/\/dictorpus.krc.karelia.ru\/en. Accessed 13 July 2023","key":"7_CR24"},{"unstructured":"Kovaleva, S.V., Rodionova, A.P.: Traditional and Innovative in the Vocabulary and Grammar of Karelian (Based on a Socio-Linguistic Research). KarNC RAN Publ., Petrozavodsk (2011). (in Russian)","key":"7_CR25"},{"unstructured":"Sox Toolkit. http:\/\/sox.sourceforge.net\/sox.html. Accessed 13 July 2023","key":"7_CR26"},{"unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: Proceedings of 2011 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 1\u20134. Institute of Electrical and Electronics Engineers (2011)","key":"7_CR27"},{"doi-asserted-by":"crossref","unstructured":"Saon, G., Soltau, H., Nahamoo, D., Picheny, M.: Speaker adaptation of neural network acoustic models using i-vectors. In: Proceedings of 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 55\u201359. Institute of Electrical and Electronics Engineers (2013)","key":"7_CR28","DOI":"10.1109\/ASRU.2013.6707705"},{"doi-asserted-by":"crossref","unstructured":"Povey, D., et al.: Semi-orthogonal low-rank matrix factorization for deep neural networks. In: Proceedings of 19th Annual Conference of the International Speech Communication Association (Interspeech), pp. 3743\u20133747. International Speech Communication Association (2018)","key":"7_CR29","DOI":"10.21437\/Interspeech.2018-1417"},{"unstructured":"Stolcke, A., Zheng, J., Wang, W., Abrash, V.: SRILM at sixteen: update and outlook. In: Proceedings of 2011 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), p. 5. Institute of Electrical and Electronics Engineers (2011)","key":"7_CR30"},{"doi-asserted-by":"crossref","unstructured":"Enarvi, S., Kurimo, M.: TheanoLM \u2013 an extensible toolkit for neural network language modeling. In: Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech), pp. 3052\u20133056. International Speech Communication Association (2016)","key":"7_CR31","DOI":"10.21437\/Interspeech.2016-618"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-48312-7_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T14:45:01Z","timestamp":1730558701000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-48312-7_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031483110","9783031483127"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-48312-7_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dharwad","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iitdh.ac.in\/specom-2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"174","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"94","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}