{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T00:37:03Z","timestamp":1770511023369,"version":"3.49.0"},"reference-count":71,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2020,4,28]],"date-time":"2020-04-28T00:00:00Z","timestamp":1588032000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,4,28]],"date-time":"2020-04-28T00:00:00Z","timestamp":1588032000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004541","name":"Ministry of Human Resource Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004541","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2020,10]]},"DOI":"10.1007\/s00034-020-01408-8","type":"journal-article","created":{"date-parts":[[2020,4,28]],"date-time":"2020-04-28T08:02:37Z","timestamp":1588060957000},"page":"5169-5197","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Unsupervised Speech Signal-to-Symbol Transformation for Language Identification"],"prefix":"10.1007","volume":"39","author":[{"given":"Saurabhchand","family":"Bhati","sequence":"first","affiliation":[]},{"given":"Shekhar","family":"Nayak","sequence":"additional","affiliation":[]},{"given":"Sri Rama Murty","family":"Kodukula","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,28]]},"reference":[{"issue":"8","key":"1408_CR1","doi-asserted-by":"crossref","first-page":"649","DOI":"10.1109\/LSP.2004.831666","volume":"11","author":"J Ajmera","year":"2004","unstructured":"J. Ajmera, I. McCowan, H. Bourlard, Robust speaker change detection. IEEE Signal Process. Lett. 11(8), 649\u2013651 (2004)","journal-title":"IEEE Signal Process. Lett."},{"issue":"2","key":"1408_CR2","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MCAS.2011.941081","volume":"11","author":"E Ambikairajah","year":"2011","unstructured":"E. Ambikairajah, H. Li, L. Wang, B. Yin, V. Sethu, Language identification: a tutorial. IEEE Circuits Syst. Mag. 11(2), 82\u2013108 (2011)","journal-title":"IEEE Circuits Syst. Mag."},{"issue":"1","key":"1408_CR3","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1109\/29.1486","volume":"36","author":"R Andre-Obrecht","year":"1988","unstructured":"R. Andre-Obrecht, A new statistical approach for the automatic segmentation of continuous speech signals. IEEE Trans. Acoust. Speech Signal Process. 36(1), 29\u201340 (1988)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"2","key":"1408_CR4","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/S0167-6393(99)00033-3","volume":"29","author":"M Bacchiani","year":"1999","unstructured":"M. Bacchiani, M. Ostendorf, Joint lexicon, acoustic unit inventory and model design. Speech Commun. 29(2), 99\u2013114 (1999)","journal-title":"Speech Commun."},{"key":"1408_CR5","unstructured":"M. Bacchiani, M. Ostendorf, Y. Sagisaka, K. Paliwal, Unsupervised learning of non-uniform segmental units for acoustic modeling in speech recognition, in: Proceedings of IEEE ASR Workshop, pp. 141\u2013142 (1995)"},{"key":"1408_CR6","doi-asserted-by":"crossref","unstructured":"S. Bhati, S. Nayak, K.S.R. Murty, Unsupervised speech signal to symbol transformation for zero resource speech applications, in Proceedings of Interspeech, pp. 2133\u20132137 (2017)","DOI":"10.21437\/Interspeech.2017-1476"},{"key":"1408_CR7","doi-asserted-by":"crossref","unstructured":"S. Bhati, S. Nayak, K. Sri Rama\u00a0Murty, Unsupervised segmentation of speech signals using kernel-gram matrices. In: Computer Vision, Pattern Recognition, Image Processing, and Graphics: 6th National Conference, NCVPRIPG, Mandi, India, Revised Selected Papers 6, pp. 139\u2013149. Springer (2017)","DOI":"10.1007\/978-981-13-0020-2_13"},{"issue":"4","key":"1408_CR8","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1016\/0167-6393(93)90083-W","volume":"12","author":"F Brugnara","year":"1993","unstructured":"F. Brugnara, D. Falavigna, M. Omologo, Automatic segmentation and labeling of speech based on hidden Markov models. Speech Commun. 12(4), 357\u2013370 (1993)","journal-title":"Speech Commun."},{"key":"1408_CR9","unstructured":"N. Br\u00fcmmer, E. De\u00a0Villiers, The bosaris toolkit: theory, algorithms and code for surviving the new dcf. arXiv preprint arXiv:1304.2865 (2013)"},{"key":"1408_CR10","doi-asserted-by":"crossref","unstructured":"W. Campbell, T. Gleason, J. Navratil, D. Reynolds, W. Shen, E. Singer, P. Torres-Carrasquillo, Advanced language recognition using cepstra and phonotactics: Mitll system performance on the NIST 2005 language recognition evaluation. In: IEEE Speaker and Language Recognition Workshop, Odyssey, pp. 1\u20138 (2006)","DOI":"10.1109\/ODYSSEY.2006.248097"},{"key":"1408_CR11","unstructured":"S. Chen, P. Gopalakrishnan, Speaker, environment and channel change detection and clustering via the bayesian information criterion, in Proceedings of DARPA broadcast news transcription and understanding workshop, vol.\u00a08, pp. 127\u2013132. Virginia, USA (1998)"},{"key":"1408_CR12","doi-asserted-by":"crossref","unstructured":"R. \u010cmejla, P. Sovka, Estimation of boundaries between speech units using bayesian changepoint detectors, in International Conference on Text, Speech and Dialogue, pp. 291\u2013298. Springer (2001)","DOI":"10.1007\/3-540-44805-5_39"},{"key":"1408_CR13","doi-asserted-by":"crossref","unstructured":"C. Corredor-Ardoy, J.L. Gauvain, M. Adda-Decker, L. Lamel, Language identification with language-independent acoustic models, in Proceedings of Eurospeech. Citeseer (1997)","DOI":"10.21437\/Eurospeech.1997-41"},{"key":"1408_CR14","unstructured":"P. Dai, U. Iurgel, G. Rigoll, A novel feature combination approach for spoken document classification with support vector machines, in Proceedings of Multimedia Information Retrieval Workshop, pp. 1\u20135. Citeseer (2003)"},{"issue":"473","key":"1408_CR15","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1198\/016214505000000745","volume":"101","author":"RA Davis","year":"2006","unstructured":"R.A. Davis, T.C.M. Lee, G.A. Rodriguez-Yam, Structural break estimation for nonstationary time series models. J. Am. Stat. Assoc. 101(473), 223\u2013239 (2006)","journal-title":"J. Am. Stat. Assoc."},{"key":"1408_CR16","doi-asserted-by":"crossref","unstructured":"N. Dehak, P.A. Torres-Carrasquillo, D. Reynolds, R. Dehak, Language recognition via i-vectors and dimensionality reduction, in Proceedings of Interspeech (2011)","DOI":"10.21437\/Interspeech.2011-328"},{"key":"1408_CR17","doi-asserted-by":"crossref","unstructured":"L.F. D\u2019Haro, R. Cordoba, C. Salamea, J.D. Echeverry, Extended phone log-likelihood ratio features and acoustic-based i-vectors for language recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 5342\u20135346 (2014)","DOI":"10.1109\/ICASSP.2014.6854623"},{"issue":"9","key":"1408_CR18","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1109\/LSP.2014.2324819","volume":"21","author":"M Diez","year":"2014","unstructured":"M. Diez, A. Varona, M. Penagarikano, L.J. Rodriguez-Fuentes, G. Bordel, On the projection of pllrs for unbounded feature distributions in spoken language recognition. IEEE Signal Process. Lett. 21(9), 1073\u20131077 (2014)","journal-title":"IEEE Signal Process. Lett."},{"key":"1408_CR19","doi-asserted-by":"crossref","unstructured":"S. Dusan, L. Rabiner, On the relation between maximum spectral transition positions and phone boundaries. In: Ninth International Conference on Spoken Language Processing (2006)","DOI":"10.21437\/Interspeech.2006-230"},{"key":"1408_CR20","doi-asserted-by":"crossref","unstructured":"I.A. Eckley, P. Fearnhead, R. Killick, Analysis of changepoint models. Bayesian Time Series Models pp. 205\u2013224 (2011)","DOI":"10.1017\/CBO9780511984679.011"},{"key":"1408_CR21","doi-asserted-by":"crossref","unstructured":"Y.P. Estevan, V. Wan, O. Scharenborg, Finding maximum margin segments in speech. In: IEEE International Conference on Acoustics, Speech and Signal Processing, vol.\u00a04, pp. IV\u2013937 (2007)","DOI":"10.1109\/ICASSP.2007.367225"},{"issue":"4","key":"1408_CR22","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1177\/002383096000300401","volume":"3","author":"I Fonagy","year":"1960","unstructured":"I. Fonagy, K. Magdics, Speed of utterance in phrases of different lengths. Lang. Speech 3(4), 179\u2013192 (1960)","journal-title":"Lang. Speech"},{"issue":"3","key":"1408_CR23","doi-asserted-by":"crossref","first-page":"268","DOI":"10.1109\/PROC.1973.9030","volume":"61","author":"GD Forney","year":"1973","unstructured":"G.D. Forney, The viterbi algorithm. Proc. IEEE 61(3), 268\u2013278 (1973)","journal-title":"Proc. IEEE"},{"key":"1408_CR24","unstructured":"J.S. Garofolo, L.F. Lamel, W.M. Fisher, J.G. Fiscus, D.S. Pallett, N.L. Dahlgren, V. Zue, Timit acoustic-phonetic continuous speech corpus. Linguist. Data Consort. 10(5), (1993)"},{"key":"1408_CR25","doi-asserted-by":"crossref","unstructured":"J.L. Gauvain, A. Messaoudi, H. Schwenk, Language recognition using phone latices, in Proceedings of Interspeech (2004)","DOI":"10.21437\/Interspeech.2004-28"},{"key":"1408_CR26","unstructured":"H. Gish, K. Ng, A segmental speech model with applications to word spotting. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, vol. 2, pp. 447\u2013450 (1993)"},{"key":"1408_CR27","doi-asserted-by":"crossref","unstructured":"A.S. Jayram, V. Ramasubramanian, T.V. Sreenivas, Language identification using parallel sub-word recognition. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a01, pp. I\u201332 (2003)","DOI":"10.21437\/Eurospeech.2003-420"},{"key":"1408_CR28","unstructured":"H. Kasuya, H. Wakita, Speech segmentation and feature normalization based on area functions. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, pp. 29\u201332 (1976)"},{"key":"1408_CR29","doi-asserted-by":"crossref","unstructured":"S. Kesiraju, R. Pappagari, L. Ondel, L. Burget, N. Dehak, S. Khudanpur, J. \u010cernock\u1ef3, S.V. Gangashetty, Topic identification of spoken documents using unsupervised acoustic unit discovery. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 5745\u20135749 (2017)","DOI":"10.1109\/ICASSP.2017.7953257"},{"key":"1408_CR30","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1016\/j.dsp.2014.08.002","volume":"35","author":"V Khanagha","year":"2014","unstructured":"V. Khanagha, K. Daoudi, O. Pont, H. Yahia, Phonetic segmentation of speech signal using local singularity analysis. Digit. Signal Proc. 35, 86\u201394 (2014)","journal-title":"Digit. Signal Proc."},{"key":"1408_CR31","unstructured":"C.H. Lee, F.K. Soong, B.H. Juang, A segment model based approach to speech recognition. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, pp. 501\u2013541 (1988)"},{"key":"1408_CR32","unstructured":"C.Y. Lee, J. Glass, A nonparametric Bayesian approach to acoustic model discovery. In: Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics: Long Papers, vol. 1, pp. 40\u201349 (2012)"},{"key":"1408_CR33","doi-asserted-by":"crossref","unstructured":"S.J. Leow, E.S. Chng, C.H. Lee, Language-resource independent speech segmentation using cues from a spectrogram image. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 5813\u20135817 (2015)","DOI":"10.1109\/ICASSP.2015.7179086"},{"key":"1408_CR34","doi-asserted-by":"crossref","unstructured":"H. Li, B. Ma, A phonotactic language model for spoken language identification. In: Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics, pp. 515\u2013522 (2005)","DOI":"10.3115\/1219840.1219904"},{"issue":"1","key":"1408_CR35","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1109\/TASL.2006.876860","volume":"15","author":"H Li","year":"2007","unstructured":"H. Li, B. Ma, C.H. Lee, A vector space modeling approach to spoken language identification. IEEE Trans. Audio Speech Lang. Process. 15(1), 271\u2013284 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"5","key":"1408_CR36","doi-asserted-by":"crossref","first-page":"1136","DOI":"10.1109\/JPROC.2012.2237151","volume":"101","author":"H Li","year":"2013","unstructured":"H. Li, B. Ma, K.A. Lee, Spoken language recognition: from fundamentals to practice. Proc. IEEE 101(5), 1136\u20131159 (2013)","journal-title":"Proc. IEEE"},{"key":"1408_CR37","doi-asserted-by":"crossref","unstructured":"B. Ma, C. Guan, H. Li, C.H. Lee, Multilingual speech recognition with language identification, in Proceedings of Interspeech (2002)","DOI":"10.21437\/ICSLP.2002-178"},{"issue":"1","key":"1408_CR38","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1017\/S1351324909005129","volume":"16","author":"C Manning","year":"2010","unstructured":"C. Manning, P. Raghavan, H. Sch\u00fctze, Introduction to information retrieval. Nat. Lang. Eng. 16(1), 100\u2013103 (2010)","journal-title":"Nat. Lang. Eng."},{"key":"1408_CR39","doi-asserted-by":"crossref","unstructured":"A. Martin, G. Doddington, T. Kamm, M. Ordowski, M. Przybocki, The DET curve in assessment of detection task performance (Technical report, DTIC Document, 1997)","DOI":"10.21437\/Eurospeech.1997-504"},{"key":"1408_CR40","unstructured":"K. Mounika, L.H. Sivanand\u00a0Achanta, V.G. Suryakanth, A.K. Vuppala, An investigation of deep neural network architectures for language recognition in Indian languages, in Proceedings of Interspeech, pp. 2930\u20132933 (2016)"},{"key":"1408_CR41","unstructured":"J. Mrozinski, E.W. Whittaker, P. Chatain, S. Furui, Automatic sentence segmentation of speech for automatic summarization. In: IEEE International Conference on Acoustics, Speech and Signal Processing, vol.\u00a01, pp. I\u2013I (2006)"},{"issue":"4","key":"1408_CR42","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1109\/79.317925","volume":"11","author":"YK Muthusamy","year":"1994","unstructured":"Y.K. Muthusamy, E. Barnard, R.A. Cole, Reviewing automatic language identification. IEEE Signal Process. Mag. 11(4), 33\u201341 (1994)","journal-title":"IEEE Signal Process. Mag."},{"key":"1408_CR43","doi-asserted-by":"crossref","unstructured":"A. Park, J.R. Glass, Towards unsupervised pattern discovery in speech. In: IEEE Workshop on Automatic Speech Recognition and Understanding, pp. 53\u201358 (2005)","DOI":"10.1109\/ASRU.2005.1566529"},{"issue":"1","key":"1408_CR44","doi-asserted-by":"crossref","first-page":"186","DOI":"10.1109\/TASL.2007.909282","volume":"16","author":"A Park","year":"2008","unstructured":"A. Park, J.R. Glass, Unsupervised pattern discovery in speech. IEEE Trans. Audio Speech Lang. Process. 16(1), 186\u2013197 (2008)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"1408_CR45","doi-asserted-by":"crossref","unstructured":"Y. Qiao, N. Shimomura, N. Minematsu, Unsupervised optimal phoneme segmentation: objectives, algorithm and comparisons. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 3989\u20133992 (2008)","DOI":"10.1109\/ICASSP.2008.4518528"},{"key":"1408_CR46","volume-title":"Multirate Digital Signal Processing","author":"LR Rabiner","year":"1996","unstructured":"L.R. Rabiner, Multirate Digital Signal Processing (Prentice Hall, Upper Saddle River, 1996)"},{"key":"1408_CR47","doi-asserted-by":"crossref","unstructured":"O. R\u00e4s\u00e4nen, U. Laine, T. Altosaar, An improved speech segmentation quality measure: the r-value, in Proceedings of Interspeech (2009)","DOI":"10.21437\/Interspeech.2009-538"},{"key":"1408_CR48","doi-asserted-by":"crossref","unstructured":"O. R\u00e4s\u00e4nen, U. Laine, T. Altosaar, Blind segmentation of speech using non-linear filtering methods. In: Speech Technologies. InTech (2011)","DOI":"10.5772\/16433"},{"issue":"2","key":"1408_CR49","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1121\/1.1910071","volume":"40","author":"DR Reddy","year":"1966","unstructured":"D.R. Reddy, Segmentation of speech sounds. J. Acoust. Soc. Am. 40(2), 307\u2013312 (1966)","journal-title":"J. Acoust. Soc. Am."},{"key":"1408_CR50","unstructured":"J. Reed, C.H. Lee, A study on music genre classification based on universal acoustic models. In: ISMIR, pp. 89\u201394 (2006)"},{"issue":"1","key":"1408_CR51","doi-asserted-by":"crossref","first-page":"016110","DOI":"10.1103\/PhysRevE.74.016110","volume":"74","author":"J Reichardt","year":"2006","unstructured":"J. Reichardt, S. Bornholdt, Statistical mechanics of community detection. Phys. Rev. E 74(1), 016110 (2006)","journal-title":"Phys. Rev. E"},{"issue":"1","key":"1408_CR52","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1109\/TASSP.1978.1163055","volume":"26","author":"H Sakoe","year":"1978","unstructured":"H. Sakoe, S. Chiba, Dynamic programming algorithm optimization for spoken word recognition. IEEE Trans. Acoust. Speech Signal Process. 26(1), 43\u201349 (1978)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"1408_CR53","unstructured":"A. Sarkar, T.V. Sreenivas, Automatic speech segmentation using average level crossing rate information. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (2005)"},{"issue":"1","key":"1408_CR54","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1016\/j.cosrev.2007.05.001","volume":"1","author":"SE Schaeffer","year":"2007","unstructured":"S.E. Schaeffer, Graph clustering. Comput. Sci. Rev. 1(1), 27\u201364 (2007)","journal-title":"Comput. Sci. Rev."},{"issue":"2","key":"1408_CR55","doi-asserted-by":"crossref","first-page":"1084","DOI":"10.1121\/1.3277194","volume":"127","author":"O Scharenborg","year":"2010","unstructured":"O. Scharenborg, V. Wan, M. Ernestus, Unsupervised speech segmentation: an analysis of the hypothesized phone boundaries. J. Acoust. Soc. Am. 127(2), 1084\u20131095 (2010)","journal-title":"J. Acoust. Soc. Am."},{"key":"1408_CR56","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4057.001.0001","volume-title":"Kernel Methods in Computational Biology","author":"B Sch\u00f6lkopf","year":"2004","unstructured":"B. Sch\u00f6lkopf, K. Tsuda, J.P. Vert, Kernel Methods in Computational Biology (MIT Press, Cambridge, 2004)"},{"key":"1408_CR57","doi-asserted-by":"crossref","unstructured":"M.H. Siu, H. Gish, A. Chan, W. Belfield, Improved topic classification and keyword discovery using an hmm-based speech recognizer trained without supervision, in Proceedings of Interspeech (2010)","DOI":"10.21437\/Interspeech.2010-302"},{"issue":"1","key":"1408_CR58","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1016\/j.csl.2013.05.002","volume":"28","author":"MH Siu","year":"2014","unstructured":"M.H. Siu, H. Gish, A. Chan, W. Belfield, S. Lowe, Unsupervised training of an HMM-based self-organizing unit recognizer with applications to topic classification and keyword discovery. Comput. Speech Lang. 28(1), 210\u2013223 (2014)","journal-title":"Comput. Speech Lang."},{"key":"1408_CR59","doi-asserted-by":"crossref","unstructured":"D. Snyder, P. Ghahremani, D. Povey, D. Garcia-Romero, Y. Carmiel, S. Khudanpur, Deep neural network-based speaker embeddings for end-to-end speaker verification. In: IEEE Spoken Language Technology Workshop (SLT), pp. 165\u2013170 (2016)","DOI":"10.1109\/SLT.2016.7846260"},{"key":"1408_CR60","doi-asserted-by":"crossref","unstructured":"A. Stan, C. Valentini-Botinhao, B. Orza, M. Giurgiu, Blind speech segmentation using spectrogram image-based features and mel cepstral coefficients. In: IEEE Spoken Language Technology Workshop (SLT), pp. 597\u2013602 (2016)","DOI":"10.1109\/SLT.2016.7846324"},{"key":"1408_CR61","doi-asserted-by":"crossref","unstructured":"R. Thiolliere, E. Dunbar, G. Synnaeve, M. Versteegh, E. Dupoux, A hybrid dynamic time warping-deep neural network architecture for unsupervised acoustic modeling, in Proceedings of Interspeech, pp. 3179\u20133183 (2015)","DOI":"10.21437\/Interspeech.2015-640"},{"issue":"7","key":"1408_CR62","doi-asserted-by":"crossref","first-page":"1335","DOI":"10.1109\/TASL.2009.2016731","volume":"17","author":"R Tong","year":"2009","unstructured":"R. Tong, B. Ma, H. Li, E.S. Chng, A target-oriented phonotactic front-end for spoken language recognition. IEEE Trans. Audio Speech Lang. Process. 17(7), 1335\u20131347 (2009)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"1408_CR63","unstructured":"R. Tong, B. Ma, D. Zhu, H. Li, E.S. Chng, Integrating acoustic, prosodic and phonotactic features for spoken language identification. In: IEEE International Conference on Acoustics, Speech and Signal Processing, vol.\u00a01, pp. I\u2013I (2006)"},{"key":"1408_CR64","doi-asserted-by":"crossref","unstructured":"K. Vijayan, H. Li, H. Sun, K.A. Lee, On the importance of analytic phase of speech signals in spoken language recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 5194\u20135198 (2018)","DOI":"10.1109\/ICASSP.2018.8461501"},{"key":"1408_CR65","unstructured":"V. Vuuren, L. Bosch, T. Niesler, Unconstrained speech segmentation using deep neural networks. In: Proceedings of the International Conference on Pattern Recognition Applications and Methods, vol. 1, pp. 248\u2013254 (2015)"},{"issue":"2","key":"1408_CR66","doi-asserted-by":"crossref","first-page":"264","DOI":"10.1109\/TASLP.2014.2387382","volume":"23","author":"H Wang","year":"2015","unstructured":"H. Wang, T. Lee, C.C. Leung, B. Ma, H. Li, Acoustic segment modeling with spectral clustering methods. IEEE\/ACM Trans. Audio Speech Lang. Process. 23(2), 264\u2013277 (2015)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"1408_CR67","doi-asserted-by":"crossref","unstructured":"H. Wang, C.C. Leung, T. Lee, B. Ma, H. Li, An acoustic segment modeling approach to query-by-example spoken term detection. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 5157\u20135160 (2012)","DOI":"10.1109\/ICASSP.2012.6289081"},{"key":"1408_CR68","unstructured":"Y. Yan, E. Barnard, An approach to automatic language identification based on language-dependent phone recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing, vol. 5, pp. 3511\u20133514 (1995)"},{"key":"1408_CR69","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1016\/j.csl.2016.02.001","volume":"39","author":"W Zhang","year":"2016","unstructured":"W. Zhang, R.A. Clark, Y. Wang, W. Li, Unsupervised language identification based on latent Dirichlet allocation. Comput. Speech Lang. 39, 47\u201366 (2016)","journal-title":"Comput. Speech Lang."},{"issue":"1","key":"1408_CR70","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1109\/TSA.1996.481450","volume":"4","author":"MA Zissman","year":"1996","unstructured":"M.A. Zissman, Comparison of four approaches to automatic language identification of telephone speech. IEEE Trans. Speech Audio Process. 4(1), 31 (1996)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1408_CR71","unstructured":"M.A. Zissman, E. Singer, Automatic language identification of telephone speech messages using phoneme recognition and n-gram modeling. In: IEEE International Conference on Acoustics, Speech and Signal Processing, vol.\u00a01, pp. I\u2013305 (1994)"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-020-01408-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-020-01408-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-020-01408-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T12:32:52Z","timestamp":1666441972000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-020-01408-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,28]]},"references-count":71,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2020,10]]}},"alternative-id":["1408"],"URL":"https:\/\/doi.org\/10.1007\/s00034-020-01408-8","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,28]]},"assertion":[{"value":"26 December 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 March 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}