{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:33:23Z","timestamp":1773318803897,"version":"3.50.1"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,4,8]],"date-time":"2021-04-08T00:00:00Z","timestamp":1617840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,4,8]],"date-time":"2021-04-08T00:00:00Z","timestamp":1617840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Reliable Intell Environ"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s40860-021-00140-7","type":"journal-article","created":{"date-parts":[[2021,4,8]],"date-time":"2021-04-08T16:30:02Z","timestamp":1617899402000},"page":"117-132","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["An exploration of semi-supervised and language-adversarial transfer learning using hybrid acoustic model for hindi speech recognition"],"prefix":"10.1007","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4713-5047","authenticated-orcid":false,"given":"Ankit","family":"Kumar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rajesh Kumar","family":"Aggarwal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,4,8]]},"reference":[{"issue":"3","key":"140_CR1","doi-asserted-by":"publisher","first-page":"1457","DOI":"10.1007\/s11235-011-9623-0","volume":"52","author":"RK Aggarwal","year":"2013","unstructured":"Aggarwal RK, Dave M (2013) Performance evaluation of sequentially combined heterogeneous feature streams for Hindi speech recognition system. Telecommun Syst 52(3):1457\u20131466","journal-title":"Telecommun Syst"},{"key":"140_CR2","doi-asserted-by":"crossref","unstructured":"Alum\u00e4e T, Tsakalidis S, Schwartz RM (2016) Improved multilingual training of stacked neural network acoustic models for low resource languages. In: Interspeech, pp 3883\u20133887","DOI":"10.21437\/Interspeech.2016-1426"},{"key":"140_CR3","doi-asserted-by":"crossref","unstructured":"Barker J, Watanabe S, Vincent E, Trmal J (2018) The fifth\u2019chime\u2019speech separation and recognition challenge: dataset, task and baselines. arXiv preprint arXiv:1803.10609","DOI":"10.21437\/Interspeech.2018-1768"},{"key":"140_CR4","doi-asserted-by":"crossref","unstructured":"Biswas A, Menon R, van\u00a0der Westhuizen E, Niesler T (2019) Improved low-resource somali speech recognition by semi-supervised acoustic and language model training. arXiv preprint arXiv:1907.03064","DOI":"10.21437\/Interspeech.2019-1328"},{"key":"140_CR5","doi-asserted-by":"crossref","unstructured":"Biswas A, de\u00a0Wet F, van\u00a0der Westhuizen E, Yilmaz E, Niesler T (2018) Multilingual neural network acoustic modelling for ASR of under-resourced English-isizulu code-switched speech. In: Interspeech, pp 2603\u20132607","DOI":"10.21437\/Interspeech.2018-1711"},{"key":"140_CR6","doi-asserted-by":"crossref","unstructured":"Chellapriyadharshini M, Toffy A, Ramasubramanian V et\u00a0al (2018) Semi-supervised and active-learning scenarios: Efficient acoustic model refinement for a low resource indian language. arXiv preprint arXiv:1810.06635","DOI":"10.21437\/Interspeech.2018-2486"},{"issue":"3","key":"140_CR7","first-page":"501","volume":"26","author":"NF Chen","year":"2017","unstructured":"Chen NF, Lim BP, Hasegawa-Johnson MA et al (2017) Multitask learning for phone recognition of underresourced languages using mismatched transcription. IEEE\/ACM Trans Audio Speech Lang Process 26(3):501\u2013514","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"140_CR8","doi-asserted-by":"crossref","unstructured":"Chen NF, Lim BP, Ni C, Xu H, HasegawaJohnson M, Chen W, Xiao X, Sivadas S, Chng ES, Ma B et\u00a0al (2017) Low-resource spoken keyword search strategies in georgian inspired by distinctive feature theory. In: 2017 Asia-Pacific signal and information processing association annual summit and conference (APSIPA ASC), pp 1322\u20131327. IEEE","DOI":"10.1109\/APSIPA.2017.8282237"},{"key":"140_CR9","doi-asserted-by":"crossref","unstructured":"Chen X, Shi Z, Qiu X, Huang X (2017) Adversarial multi-criteria learning for chinese word segmentation. arXiv preprint arXiv:1704.07556","DOI":"10.18653\/v1\/P17-1110"},{"key":"140_CR10","doi-asserted-by":"crossref","unstructured":"Cho K, Van\u00a0Merri\u00ebnboer B, Bahdanau D, Bengio Y (2014) On the properties of neural machine translation: encoder\u2013decoder approaches. arXiv preprint arXiv:1409.1259","DOI":"10.3115\/v1\/W14-4012"},{"key":"140_CR11","doi-asserted-by":"crossref","unstructured":"Chu SM, Povey D, Kuo HK, Mangu L, Zhang S, Shi Q, Qin Y (2010) The 2009 ibm gale mandarin broadcast transcription system. In: 2010 IEEE international conference on acoustics, speech and signal processing, pp 4374\u20134377. IEEE","DOI":"10.1109\/ICASSP.2010.5495639"},{"key":"140_CR12","doi-asserted-by":"crossref","unstructured":"Cui J, Kingsbury B, Ramabhadran B, Sethy A, Audhkhasi K, Cui X, Kislal E, Mangu L, Nussbaum-Thom M, Picheny M et\u00a0al (2015) Multilingual representations for low resource speech recognition and keyword search. In: 2015 IEEE workshop on automatic speech recognition and understanding (ASRU), pp 259\u2013266. IEEE","DOI":"10.1109\/ASRU.2015.7404803"},{"issue":"1","key":"140_CR13","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"GE Dahl","year":"2011","unstructured":"Dahl GE, Yu D, Deng L, Acero A (2011) Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans Audio Speech Language Process 20(1):30\u201342","journal-title":"IEEE Trans Audio Speech Language Process"},{"key":"140_CR14","doi-asserted-by":"crossref","unstructured":"Dash D, Kim MJ, Teplansky K, Wang J (2018) Automatic speech recognition with articulatory information and a unified dictionary for Hindi, Marathi, Bengali and Oriya. In: Interspeech, pp 1046\u20131050","DOI":"10.21437\/Interspeech.2018-2122"},{"issue":"1","key":"140_CR15","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1515\/jisys-2017-0618","volume":"29","author":"M Dua","year":"2018","unstructured":"Dua M, Aggarwal RK, Biswas M (2018) Discriminative training using noise robust integrated features and refined hmm modeling. J Intell Syst 29(1):327\u2013344","journal-title":"J Intell Syst"},{"issue":"10","key":"140_CR16","doi-asserted-by":"publisher","first-page":"6747","DOI":"10.1007\/s00521-018-3499-9","volume":"31","author":"M Dua","year":"2019","unstructured":"Dua M, Aggarwal RK, Biswas M (2019) Discriminatively trained continuous Hindi speech recognition system using interpolated recurrent neural network language modeling. Neural Comput Appl 31(10):6747\u20136755","journal-title":"Neural Comput Appl"},{"key":"140_CR17","doi-asserted-by":"crossref","unstructured":"Fathima N, Patel T, Mahima C, Iyengar A (2018) Tdnn-based multilingual speech recognition system for low resource Indian languages. In: Interspeech, pp 3197\u20133201","DOI":"10.21437\/Interspeech.2018-2117"},{"key":"140_CR18","unstructured":"Gales MJ, Knill KM, Ragni A, Rath SP (2014) Speech recognition and keyword spotting for low-resource languages: Babel project research at cued. In: Fourth International workshop on spoken language technologies for under-resourced languages (SLTU-2014), pp 16\u201323. International Speech Communication Association (ISCA)"},{"key":"140_CR19","unstructured":"Ganin Y, Lempitsky V (2015) Unsupervised domain adaptation by backpropagation. In: International conference on machine learning, pp 1180\u20131189"},{"issue":"1","key":"140_CR20","first-page":"2026","volume":"17","author":"Y Ganin","year":"2016","unstructured":"Ganin Y, Ustinova E, Ajakan H, Germain P, Larochelle H, Laviolette F, Marchand M, Lempitsky V (2016) Domain-adversarial training of neural networks. J Mach Learn Res 17(1):2026\u20132030","journal-title":"J Mach Learn Res"},{"key":"140_CR21","doi-asserted-by":"crossref","unstructured":"Ghoshal A, Swietojanski P, Renals S (2013) Multilingual training of deep neural networks. In: 2013 IEEE international conference on acoustics, speech and signal processing, pp 7319\u20137323. IEEE","DOI":"10.1109\/ICASSP.2013.6639084"},{"key":"140_CR22","doi-asserted-by":"crossref","unstructured":"Gr\u00e9zl F, Karafiat M, Janda M (2011) Study of probabilistic and bottle-neck features in multilingual environment. In: 2011 IEEE workshop on automatic speech recognition & understanding, pp 359\u2013364. IEEE","DOI":"10.1109\/ASRU.2011.6163958"},{"key":"140_CR23","unstructured":"Hain T, Woodland P, Evermann G, Povey D (2000) The CU-HTK march 2000 hub5e transcription system. In: Proc. speech transcription workshop, vol\u00a01. Citeseer"},{"key":"140_CR24","doi-asserted-by":"crossref","unstructured":"Hartmann W, Hsiao R, Tsakalidis S (2017) Alternative networks for monolingual bottleneck features. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5290\u20135294. IEEE","DOI":"10.1109\/ICASSP.2017.7953166"},{"key":"140_CR25","doi-asserted-by":"crossref","unstructured":"Heigold G, Vanhoucke V, Senior A, Nguyen P, Ranzato M, Devin M, Dean J (2013) Multilingual acoustic models using distributed deep neural networks. In: 2013 IEEE international conference on acoustics, speech and signal processing, pp 8619\u20138623. IEEE","DOI":"10.1109\/ICASSP.2013.6639348"},{"key":"140_CR26","doi-asserted-by":"crossref","unstructured":"Hernandez F, Nguyen V, Ghannay S, Tomashenko N, Est\u00e8ve Y (2018) Ted-lium 3: twice as much data and corpus repartition for experiments on speaker adaptation. In: International conference on speech and computer, pp 198\u2013208. Springer","DOI":"10.1007\/978-3-319-99579-3_21"},{"issue":"6","key":"140_CR27","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton G, Deng L, Yu D, Dahl GE, Mohamed A, Jaitly N, Senior A, Vanhoucke V, Nguyen P, Sainath TN et al (2012) Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process Mag 29(6):82\u201397","journal-title":"IEEE Signal Process Mag"},{"issue":"8","key":"140_CR28","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"140_CR29","doi-asserted-by":"crossref","unstructured":"Hoshen Y, Weiss RJ, Wilson KW (2015) Speech acoustic modeling from raw multichannel waveforms. In: 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 4624\u20134628. IEEE","DOI":"10.1109\/ICASSP.2015.7178847"},{"key":"140_CR30","doi-asserted-by":"crossref","unstructured":"Huang JT, Li J, Yu D, Deng L, Gong Y (2013) Cross-language knowledge transfer using multilingual deep neural network with shared hidden layers. In: 2013 IEEE international conference on acoustics, speech and signal processing, pp 7304\u20137308. IEEE","DOI":"10.1109\/ICASSP.2013.6639081"},{"issue":"12","key":"140_CR31","first-page":"23","volume":"8","author":"JW Jung","year":"2018","unstructured":"Jung JW, Heo HS, Yang IH, Shim HJ, Yu HJ (2018) Avoiding speaker overfitting in end-to-end DNNS using raw waveform for text-independent speaker verification. Extraction 8(12):23\u201324","journal-title":"Extraction"},{"issue":"5","key":"140_CR32","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1080\/03772063.2017.1369370","volume":"64","author":"V Kadyan","year":"2018","unstructured":"Kadyan V, Mantri A, Aggarwal R (2018) Refinement of HMM model parameters for Punjabi automatic speech recognition (PASR) system. IETE J Res 64(5):673\u2013688","journal-title":"IETE J Res"},{"key":"140_CR33","doi-asserted-by":"crossref","unstructured":"Ko T, Peddinti V, Povey D, Khudanpur S (2015) Audio augmentation for speech recognition. In: Sixteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2015-711"},{"key":"140_CR34","doi-asserted-by":"crossref","unstructured":"Kriman S, Beliaev S, Ginsburg B, Huang J, Kuchaiev O, Lavrukhin V, Leary R, Li J, Zhang Y (2020) Quartznet: Deep automatic speech recognition with 1d time-channel separable convolutions. In: ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 6124\u20136128. IEEE","DOI":"10.1109\/ICASSP40776.2020.9053889"},{"key":"140_CR35","unstructured":"Lazaridis A, Himawan I, Motlicek P, Mporas I, Garner PN (2016) Investigating cross-lingual multi-level adaptive networks: The importance of the correlation of source and target languages. In: Proceedings of the international workshop on spoken language translation, CONF"},{"issue":"5","key":"140_CR36","doi-asserted-by":"publisher","first-page":"1187","DOI":"10.1109\/JAS.2019.1911693","volume":"6","author":"D Liu","year":"2019","unstructured":"Liu D, Xu J, Zhang P, Yan Y (2019) Investigation of knowledge transfer approaches to improve the acoustic modeling of vietnamese asr system. IEEE\/CAA J Autom Sin 6(5):1187\u20131195","journal-title":"IEEE\/CAA J Autom Sin"},{"key":"140_CR37","first-page":"2237","volume":"13","author":"Y Miao","year":"2013","unstructured":"Miao Y, Metze F (2013) Improving low-resource CD-DNN-HMM using dropout and multilingual DNN training. Interspeech 13:2237\u20132241","journal-title":"Interspeech"},{"key":"140_CR38","doi-asserted-by":"crossref","unstructured":"Ni C, Leung CC, Wang L, Chen NF, Ma B (2017) Efficient methods to train multilingual bottleneck feature extractors for low resource keyword search. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5650\u20135654. IEEE","DOI":"10.1109\/ICASSP.2017.7953238"},{"key":"140_CR39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-3","volume-title":"Analysis of CNN-based speech recognition system using raw speech as input","author":"D Palaz","year":"2015","unstructured":"Palaz D, Collobert R et al (2015) Analysis of CNN-based speech recognition system using raw speech as input. Tech. rep, Idiap"},{"key":"140_CR40","doi-asserted-by":"crossref","unstructured":"Panayotov V, Chen G, Povey D, Khudanpur S (2015) Librispeech: an ASR corpus based on public domain audio books. In: 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5206\u20135210. IEEE","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"140_CR41","doi-asserted-by":"crossref","unstructured":"Parcollet T, Morchid M, Linar\u00e8s G, De\u00a0Mori R (2019) Bidirectional quaternion long short-term memory recurrent neural networks for speech recognition. In: ICASSP 2019-2019 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 8519\u20138523. IEEE","DOI":"10.1109\/ICASSP.2019.8683583"},{"key":"140_CR42","unstructured":"Parcollet T, Ravanelli M, Morchid M, Linar\u00e8s G, Trabelsi C, De\u00a0Mori R, Bengio Y (2018) Quaternion recurrent neural networks. arXiv preprint arXiv:1806.04418"},{"issue":"3","key":"140_CR43","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/s10772-018-09584-4","volume":"22","author":"V Passricha","year":"2019","unstructured":"Passricha V, Aggarwal RK (2019) Convolutional support vector machines for speech recognition. Int J Speech Technol 22(3):601\u2013609","journal-title":"Int J Speech Technol"},{"key":"140_CR44","unstructured":"Ravanelli M, Bengio Y (2018) Interpretable convolutional filters with sincnet. arXiv preprint arXiv:1811.09725"},{"issue":"2","key":"140_CR45","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1109\/TETCI.2017.2762739","volume":"2","author":"M Ravanelli","year":"2018","unstructured":"Ravanelli M, Brakel P, Omologo M, Bengio Y (2018) Light gated recurrent units for speech recognition. IEEE Trans Emerg Top Comput Intell 2(2):92\u2013102","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"key":"140_CR46","doi-asserted-by":"crossref","unstructured":"Ravanelli M, Parcollet T, Bengio Y (2019) The Pytorch-Kaldi speech recognition toolkit. In: ICASSP 2019-2019 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 6465\u20136469. IEEE","DOI":"10.1109\/ICASSP.2019.8683713"},{"key":"140_CR47","unstructured":"Ravanelli M (2018) Interpretable convolutional filters with sincnet. arXiv preprint arXiv:1811.09725"},{"key":"140_CR48","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1016\/j.procs.2017.08.003","volume":"112","author":"I Rebai","year":"2017","unstructured":"Rebai I, BenAyed Y, Mahdi W, Lorr\u00e9 JP (2017) Improving speech recognition using data augmentation and acoustic model fusion. Proc Comput Sci 112:316\u2013322","journal-title":"Proc Comput Sci"},{"key":"140_CR49","unstructured":"Roger V, Farinas J, Pinquier J (2020) Deep neural networks for automatic speech processing: a survey from large corpora to limited data. arXiv preprint arXiv:2003.04241"},{"key":"140_CR50","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1016\/j.procs.2016.04.043","volume":"81","author":"R Sahraeian","year":"2016","unstructured":"Sahraeian R, Van Compernolle D (2016) Using weighted model averaging in distributed multilingual DNNS to improve low resource ASR. Proc Comput Sci 81:152\u2013158","journal-title":"Proc Comput Sci"},{"issue":"11","key":"140_CR51","doi-asserted-by":"publisher","first-page":"1991","DOI":"10.1109\/TASLP.2018.2851145","volume":"26","author":"R Sahraeian","year":"2018","unstructured":"Sahraeian R, Van Compernolle D (2018) Cross-entropy training of DNN ensemble acoustic models for low-resource ASR. IEEE\/ACM Trans Audio Speech Lang Process 26(11):1991\u20132001","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"140_CR52","doi-asserted-by":"crossref","unstructured":"Sailor HB, Krishna MVS, Chhabra D, Patil AT, Kamble MR, Patil HA (2018) DA-IICT\/IIITV system for low resource speech recognition challenge 2018. In: Interspeech, pp 3187\u20133191","DOI":"10.21437\/Interspeech.2018-1553"},{"key":"140_CR53","doi-asserted-by":"crossref","unstructured":"Samudravijaya K, Rao P, Agrawal S (2000) Hindi speech database. In: Sixth International conference on spoken language processing","DOI":"10.21437\/ICSLP.2000-847"},{"key":"140_CR54","doi-asserted-by":"crossref","unstructured":"Saon G, Kurata G, Sercu T, Audhkhasi K, Thomas S, Dimitriadis D, Cui X, Ramabhadran B, Picheny M, Lim LL et\u00a0al (2017) English conversational telephone speech recognition by humans and machines. arXiv preprint arXiv:1703.02136","DOI":"10.21437\/Interspeech.2017-405"},{"key":"140_CR55","doi-asserted-by":"crossref","unstructured":"Scanzio S, Laface P, Fissore L, Gemello R, Mana F (2008) On the use of a multilingual neural network front-end. In: Ninth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2008-672"},{"key":"140_CR56","doi-asserted-by":"crossref","unstructured":"Sercu T, Puhrsch C, Kingsbury B, LeCun Y (2016) Very deep multilingual convolutional neural networks for LVCSR. In: 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 4955\u20134959. IEEE","DOI":"10.1109\/ICASSP.2016.7472620"},{"key":"140_CR57","unstructured":"Shangguan Y, Li J, Qiao L, Alvarez R, McGraw I (2019) Optimizing speech recognition for the edge. arXiv preprint arXiv:1909.12408"},{"key":"140_CR58","doi-asserted-by":"crossref","unstructured":"Shetty VM, Sharon RA, Abraham B, Seeram T, Prakash A, Ravi N, Umesh S (2018) Articulatory and stacked bottleneck features for low resource speech recognition. In: Interspeech, pp 3202\u20133206","DOI":"10.21437\/Interspeech.2018-2226"},{"key":"140_CR59","doi-asserted-by":"crossref","unstructured":"Shinohara Y (2016) Adversarial multi-task learning of deep neural networks for robust speech recognition. In: Interspeech, pp 2369\u20132372. San Francisco, CA, USA","DOI":"10.21437\/Interspeech.2016-879"},{"key":"140_CR60","doi-asserted-by":"crossref","unstructured":"Stolcke A (2002) Srilm-an extensible language modeling toolkit. In: Seventh international conference on spoken language processing","DOI":"10.21437\/ICSLP.2002-303"},{"key":"140_CR61","doi-asserted-by":"crossref","unstructured":"Tong S, Garner PN, Bourlard H (2017) An investigation of deep neural networks for multilingual speech recognition training and adaptation. In: Proc. of Interspeech, conf","DOI":"10.21437\/Interspeech.2017-1242"},{"key":"140_CR62","doi-asserted-by":"crossref","unstructured":"Trmal J, Wiesner M, Peddinti V, Zhang X, Ghahremani P, Wang Y, Manohar V, Xu H, Povey D, Khudanpur S (2017) The kaldi openkws system: Improving low resource keyword search. In: Interspeech, pp 3597\u20133601","DOI":"10.21437\/Interspeech.2017-601"},{"key":"140_CR63","doi-asserted-by":"crossref","unstructured":"T\u00fcske Z, Golik P, Schl\u00fcter R, Ney H (2014) Acoustic modeling with deep neural networks using raw time signal for LVCSR. In: Fifteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-223"},{"key":"140_CR64","doi-asserted-by":"crossref","unstructured":"T\u00fcske Z, Pinto J, Willett D, Schl\u00fcter R (2013) Investigation on cross-and multilingual MLP features under matched and mismatched acoustical conditions. In: 2013 IEEE international conference on acoustics, speech and signal processing, pp 7349\u20137353. IEEE","DOI":"10.1109\/ICASSP.2013.6639090"},{"key":"140_CR65","doi-asserted-by":"crossref","unstructured":"Tzeng E, Hoffman J, Saenko K, Darrell T (2017) Adversarial discriminative domain adaptation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7167\u20137176","DOI":"10.1109\/CVPR.2017.316"},{"key":"140_CR66","doi-asserted-by":"crossref","unstructured":"Vesel\u1ef3 K, Karafi\u00e1t M, Gr\u00e9zl F, Janda M, Egorova E (2012) The language-independent bottleneck features. In: 2012 IEEE spoken language technology workshop (SLT), pp 336\u2013341. IEEE","DOI":"10.1109\/SLT.2012.6424246"},{"key":"140_CR67","doi-asserted-by":"crossref","unstructured":"Vu NT, Schultz T (2013) Multilingual multilayer perceptron for rapid language adaptation between and across language families. In: Interspeech, pp 515\u2013519","DOI":"10.21437\/Interspeech.2013-146"},{"key":"140_CR68","doi-asserted-by":"crossref","unstructured":"Vydana HK, Gurugubelli K, Vegesna VVR, Vuppala AK (2018) An exploration towards joint acoustic modeling for Indian languages: Iiit-h submission for low resource speech recognition challenge for Indian languages, interspeech 2018. In: Interspeech, pp 3192\u20133196","DOI":"10.21437\/Interspeech.2018-1584"},{"key":"140_CR69","doi-asserted-by":"crossref","unstructured":"Wilkinson N, Biswas A, Y\u0131lmaz E, De\u00a0Wet F, van\u00a0der Westhuizen E, Niesler TR (2020) Semi-supervised acoustic modelling for five-lingual code-switched ASR using automatically-segmented soap opera speech. arXiv preprint arXiv:2004.06480","DOI":"10.21437\/Interspeech.2019-1325"},{"key":"140_CR70","doi-asserted-by":"crossref","unstructured":"Xu H, Do VH, Xiao X, Chng ES (2015) A comparative study of BNF and DNN multilingual training on cross-lingual low-resource speech recognition. In: Sixteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2015-481"},{"key":"140_CR71","doi-asserted-by":"crossref","unstructured":"Xu H, Li K, Wang Y, Wang J, Kang S, Chen X, Povey D, Khudanpur S (2018) Neural network language modeling with letter-based features and importance sampling. In: 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 6109\u20136113. IEEE","DOI":"10.1109\/ICASSP.2018.8461704"},{"key":"140_CR72","doi-asserted-by":"crossref","unstructured":"Xu H, Su H, Ni C, Xiao X, Huang H, Chng ES, Li H (2016) Semi-supervised and cross-lingual knowledge transfer learnings for DNN hybrid acoustic models under low-resource conditions. In: Interspeech, pp 1315\u20131319","DOI":"10.21437\/Interspeech.2016-1099"},{"issue":"3","key":"140_CR73","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1109\/TASLP.2018.2889606","volume":"27","author":"J Yi","year":"2018","unstructured":"Yi J, Tao J, Wen Z, Bai Y (2018) Language-adversarial transfer learning for low-resource speech recognition. IEEE\/ACM Trans Audio Speech Lang Process 27(3):621\u2013630","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"140_CR74","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1016\/j.procs.2016.04.044","volume":"81","author":"E Y\u0131lmaz","year":"2016","unstructured":"Y\u0131lmaz E, van den Heuvel H, van Leeuwen D (2016) Investigating bilingual deep neural networks for automatic recognition of code-switching Frisian speech. Proc Comput Sci 81:159\u2013166","journal-title":"Proc Comput Sci"},{"key":"140_CR75","unstructured":"Yin W, Kann K, Yu M, Sch\u00fctze H (2017) Comparative study of CNN and RNN for natural language processing. arXiv preprint arXiv:1702.01923"},{"issue":"3","key":"140_CR76","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1109\/JAS.2017.7510508","volume":"4","author":"D Yu","year":"2017","unstructured":"Yu D, Li J (2017) Recent progresses in deep learning based acoustic models. IEEE\/CAA J Autom Sin 4(3):396\u2013409","journal-title":"IEEE\/CAA J Autom Sin"},{"key":"140_CR77","doi-asserted-by":"crossref","unstructured":"Zeghidour N, Usunier N, Synnaeve G, Collobert R, Dupoux E (2018) End-to-end speech recognition from the raw waveform. arXiv preprint arXiv:1806.07098","DOI":"10.21437\/Interspeech.2018-2414"},{"key":"140_CR78","doi-asserted-by":"crossref","unstructured":"Zhang M, Liu Y, Luan H, Sun M (2017) Adversarial training for unsupervised bilingual lexicon induction. In: Proceedings of the 55th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 1959\u20131970","DOI":"10.18653\/v1\/P17-1179"},{"key":"140_CR79","doi-asserted-by":"crossref","unstructured":"Zhou S, Zhao Y, Xu S, Xu B et\u00a0al (2017) Multilingual recurrent neural networks with residual learning for low-resource speech recognition","DOI":"10.21437\/Interspeech.2017-111"}],"container-title":["Journal of Reliable Intelligent Environments"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40860-021-00140-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40860-021-00140-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40860-021-00140-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,23]],"date-time":"2022-12-23T20:41:53Z","timestamp":1671828113000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40860-021-00140-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,4,8]]},"references-count":79,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["140"],"URL":"https:\/\/doi.org\/10.1007\/s40860-021-00140-7","relation":{},"ISSN":["2199-4668","2199-4676"],"issn-type":[{"value":"2199-4668","type":"print"},{"value":"2199-4676","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,4,8]]},"assertion":[{"value":"20 August 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 April 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}