{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T09:03:21Z","timestamp":1726045401437},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811394423"},{"type":"electronic","value":"9789811394430"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-981-13-9443-0_19","type":"book-chapter","created":{"date-parts":[[2019,9,24]],"date-time":"2019-09-24T09:03:11Z","timestamp":1569315791000},"page":"221-235","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Enabling Spoken Dialogue Systems for Low-Resourced Languages\u2014End-to-End Dialect Recognition for North Sami"],"prefix":"10.1007","author":[{"given":"Trung Ngo","family":"Trong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kristiina","family":"Jokinen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ville","family":"Hautam\u00e4ki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,9,25]]},"reference":[{"key":"19_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid O, Mohamed A-R (2014) Convolutional neural networks for speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 22:1533\u20131545","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"19_CR2","unstructured":"Amodei D, Anubhai R, Battenberg E et al (2015) Deep speech 2: End-to-end speech recognition in English and Mandarin. CoRR, vol. abs\/1512.02595"},{"key":"19_CR3","unstructured":"Bahdanau D, Chorowski J, Serdyuk D, Brakel P, Bengio Y (2015) End-to-end attention-based large vocabulary speech recognition. CoRR, vol. abs\/1508.04395"},{"issue":"1","key":"19_CR4","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/TASLP.2015.2489558","volume":"24","author":"H Behravan","year":"2016","unstructured":"Behravan H, Hautam\u00e4ki V, Siniscalchi SM, Kinnunen T, Lee C-H (2016) I-vector modeling of speech attributes for automatic foreign accent recognition. Audio, Speech, Lang Process, IEEE\/ACM Trans 24(1):29\u201341","journal-title":"Audio, Speech, Lang Process, IEEE\/ACM Trans"},{"key":"19_CR5","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.specom.2013.07.008","volume":"56","author":"L Besacier","year":"2014","unstructured":"Besacier L, Barnard E, Karpov A, Schultz T (2014) Automatic speech recognition for under-resourced languages: a survey. Speech Commun 56:85\u2013100","journal-title":"Speech Commun"},{"key":"19_CR6","unstructured":"Crystal D (2000). English as a global language. Cambridge"},{"key":"19_CR7","unstructured":"Dalyac A, Shanahan M, Kelly J (2014). Tackling class imbalance with deep convolutional neural networks. Thesis, Imperial College London"},{"issue":"4","key":"19_CR8","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak N, Kenny P, Dehak R, Dumouchel P, Ouellet P (2011) Front-end factor analysis for speaker verification. IEEE Trans Audio Speech Lang Process 19(4):788\u2013798","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Ganapathy S, Han K, Thomas S et al (2014) Robust language identification using convolutional neural network features. In: Proceedings of the fifteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-419"},{"key":"19_CR10","doi-asserted-by":"crossref","unstructured":"Glas DF, Minato T, Ishi CT, Kawahara T, Ishiguro H (2016) Erica: the erato intelligent conversational android. In: 2016 25th IEEE international symposium on robot and human interactive communication (RO-MAN). IEEE, pp 22\u201329","DOI":"10.1109\/ROMAN.2016.7745086"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Gonzalez-Dominguez J, Lopez-Moreno I, Sak H (2014) Automatic language identification using long short-term memory recurrent neural networks. Interspeech","DOI":"10.1109\/ICASSP.2014.6854622"},{"key":"19_CR12","unstructured":"Hiovain K, Jokinen K (2016) Acoustic features of different types of laughter in north sami conversational speech. In: Proceedings of the LREC Workshop Just talking\u2014casual talk among humans and machines, Portoro\u017e, Slovenia"},{"key":"19_CR13","unstructured":"Jokinen K (2014) Open-domain interaction and online content in the sami language. In: Proceedings of the language resources and evaluation conference (LREC 2014)"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"Jokinen K, Trong TN, Hautam\u00e4ki V (2016) Variation in Spoken North Sami Language. Interspeech-2016, pp. 3299\u20133303","DOI":"10.21437\/Interspeech.2016-1438"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Jokinen K, Hiovain K, Laxstr\u00f6m N, Rauhala I, Wilcock G (2017) DigiSami and digital natives: Interaction technology for the north sami language. In: Jokinen K, Wilcock G (eds) Dialogues with social robots. Springer, pp 3\u201319","DOI":"10.1007\/978-981-10-2585-3_1"},{"key":"19_CR16","unstructured":"Jokinen K, Wilcock G (2013) Multimodal open-domain conversations with the Nao robot. In: Natural interaction with robots, knowbots and smartphones: putting spoken dialogue systems into practice. Springer, pp 213\u2013224"},{"key":"19_CR17","unstructured":"Jokinen K, Wilcock G (2014) Community-based resource building and data collection. In: Proceedings of the 4th international workshop on spoken language technologies for under-resourced languages (SLTU\u201914). St Petersburg, Russia, pp 201\u2013206"},{"issue":"13","key":"19_CR18","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"K Kirkpatrick","year":"2017","unstructured":"Kirkpatrick K, Pascanu R, Rabinowitz NC et al (2017) Overcoming catastrophic forgetting in neural networks. Proc Natl Acad Sci USA 114(13):3521\u20133526","journal-title":"Proc Natl Acad Sci USA"},{"issue":"7553","key":"19_CR19","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u2013444","journal-title":"Nature"},{"key":"19_CR20","first-page":"9","volume-title":"Efficient Back-Prop","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Orr GB, M\u00fcller KR (1998) Efficient Back-Prop. Springer, Berlin Heidelberg, Berlin, Heidelberg, pp 9\u201350"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Lee KA, Li H, Deng L, Hautam\u00e4ki V et al (2016) The 2015 NIST language recognition evaluation: The shared view of i2r, fantastic4 and singams. Interspeech","DOI":"10.21437\/Interspeech.2016-624"},{"key":"19_CR22","unstructured":"Leinonen J (2015) Automatic speech recognition for human-robot interaction using an under-resourced language. Master\u2019s thesis, Aalto University, School of Electrical Engineering, Department of Signal Processing and Acoustics, Espoo"},{"issue":"5","key":"19_CR23","doi-asserted-by":"publisher","first-page":"1136","DOI":"10.1109\/JPROC.2012.2237151","volume":"101","author":"H Li","year":"2013","unstructured":"Li H, Ma B, Lee KA (2013) Spoken language recognition: From fundamentals to practice. Proc IEEE 101(5):1136\u20131159","journal-title":"Proc IEEE"},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Lopez-Moreno I, Gonzalez-Dominguez J, Plchot O (2014) Automatic language identification using deep neural networks. ICASSP","DOI":"10.1109\/ICASSP.2014.6854622"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Matrouf D, Scheffer N, Fauve BGB, Bonastre J-F (2007) A straightforward and efficient implementation of the factor analysis model for speaker verification. Interspeech, pp 1242\u20131245","DOI":"10.21437\/Interspeech.2007-156"},{"key":"19_CR26","unstructured":"Mi H, Wang Z, Ittycheriah A (2016) Supervised attentions for neural machine translation, CoRR, vol. abs\/1608.00112"},{"key":"19_CR27","unstructured":"Morin F, Bengio Y (2005) Hierarchical probabilistic neural network language model. AIS-TATS05, pp 246\u2013252"},{"key":"19_CR28","unstructured":"Prechelt L (2012) Neural Networks: Tricks of the Trade, 2nd edn. Chapter \u201cEarly Stopping\u2014But When?\u201d. Springer, Berlin, Heidelberg, pp 53\u201367"},{"key":"19_CR29","unstructured":"Radford A, Metz L, Chintala S (2015) Unsupervised representation learning with deep convolutional generative adversarial networks. CoRR, vol. abs\/1511.06434"},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Richardson F, Reynolds DA, Dehak N (2015) A unified deep neural network for speaker and language recognition. CoRR, vol. abs\/1504.00923","DOI":"10.21437\/Interspeech.2015-299"},{"key":"19_CR31","unstructured":"Sainath TN, Kingsbury B, Saon G, Soltau H, Mohamed A, Dahl G, Ramabhadran B (2014) Deep convolutional neural networks for large-scale speech tasks. Neural Netw, pp 1\u201310"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Sainath T, Vinyals O, Senior A, Sak H (2015) Convolutional, long short-term memory, fully connected deep neural networks. ICASSP, pp 4580\u20134584","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Thomas S, Seltzer ML, Church K, Hermansky H (2013) Deep neural network features and semi-supervised training for low resource speech recognition. In: IEEE international conference on acoustics, speech and signal processing, pp 6704\u20136708","DOI":"10.1109\/ICASSP.2013.6638959"},{"key":"19_CR34","volume-title":"Deep Language: a comprehensive deep learning approach to end-to-end language recognition","author":"TN Trong","year":"2016","unstructured":"Trong TN, Hautam\u00e4ki V, Lee KA (2016) Deep Language: a comprehensive deep learning approach to end-to-end language recognition. Speaker Odyssey, Bilbao, Spain"},{"key":"19_CR35","unstructured":"Trong TN, Hiovain K, Jokinen K (2016) Laughing and co-construction of common ground in human conversations. The 4th European and 7th Nordic symposium on multimodal communication, Copenhagen, Denmark"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Wilcock G, Jokinen K (2014) Advances in Wikipedia-based Interaction with Robots. In: Proceedings of the ICMI workshop on multi-modal, multi-party, real-world human-robot interaction, pp 13\u201318","DOI":"10.1145\/2666499.2666503"},{"key":"19_CR37","doi-asserted-by":"crossref","unstructured":"Wilcock G, Jokinen K (2015) Multilingual WikiTalk: Wikipedia-based talking robots that switch languages. In: Proceedings of the SIGDial conference, pp 162\u2013164","DOI":"10.18653\/v1\/W15-4623"},{"key":"19_CR38","unstructured":"Wilcock G, Laxstr\u00f6m N, Leinonen J, Smit P, Kurimo M, Jokinen K (2016) Towards SamiTalk: A sami-speaking robot linked to sami wikipedia. In: Jokinen K, Wilcock G (eds) Dialogues with Social Robots. Springer, pp 343\u2013351"},{"key":"19_CR39","doi-asserted-by":"crossref","unstructured":"Wilcock G, Jokinen K (2013) WikiTalk human-robot interactions. In: Proceedings of the 15th ACM international conference on multimodal interaction (ICMI), pp 73\u201374","DOI":"10.1145\/2522848.2531753"},{"key":"19_CR40","unstructured":"Xu K, Ba J, Kiros R, Cho K et al (2015) Show, Attend and tell: Neural image caption generation with visual attention. In: Proceedings of the 32nd international conference on machine learning, pp 2048\u20132057"},{"key":"19_CR41","doi-asserted-by":"crossref","unstructured":"Zhang S, Qin Y (2013) Semi-supervised accent detection and modelling. In: IEEE international conference on acoustics, speech and signal processing, pp 7175\u20137179","DOI":"10.1109\/ICASSP.2013.6639055"},{"key":"19_CR42","unstructured":"Zhang Z, Bengio S, Hardt M, Recht B, Vinyals O (2016) Understanding deep learning requires rethinking generalization"},{"key":"19_CR43","doi-asserted-by":"crossref","unstructured":"\u00d3 Laoire M (2008) Indigenous language revitalization and globalization. Te Kaharoa 1","DOI":"10.24135\/tekaharoa.v1i1.143"}],"container-title":["Lecture Notes in Electrical Engineering","9th International Workshop on Spoken Dialogue System Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-13-9443-0_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,20]],"date-time":"2023-09-20T18:17:23Z","timestamp":1695233843000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-13-9443-0_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9789811394423","9789811394430"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-13-9443-0_19","relation":{},"ISSN":["1876-1100","1876-1119"],"issn-type":[{"type":"print","value":"1876-1100"},{"type":"electronic","value":"1876-1119"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"25 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}