{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T11:02:23Z","timestamp":1740135743317,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,4,11]],"date-time":"2023-04-11T00:00:00Z","timestamp":1681171200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,4,11]],"date-time":"2023-04-11T00:00:00Z","timestamp":1681171200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s00034-023-02360-z","type":"journal-article","created":{"date-parts":[[2023,4,11]],"date-time":"2023-04-11T07:04:41Z","timestamp":1681196681000},"page":"5412-5427","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Relative Significance of Speech Sounds in Speaker Verification Systems"],"prefix":"10.1007","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4964-3328","authenticated-orcid":false,"given":"B. Shaik Mohammad","family":"Rafi","sequence":"first","affiliation":[]},{"given":"Sreekanth","family":"Sankala","sequence":"additional","affiliation":[]},{"given":"K. Sri Rama","family":"Murty","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,11]]},"reference":[{"key":"2360_CR1","doi-asserted-by":"crossref","unstructured":"K. Amino, T. Arai, Perceptual speaker identification using monosyllabic stimuli-effects of the nucleus vowels and speaker characteristics contained in nasals. In Ninth Annual Conference of the International Speech Communication Association (2008)","DOI":"10.21437\/Interspeech.2008-507"},{"key":"2360_CR2","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1250\/ast.27.233","volume":"27","author":"K Amino","year":"2006","unstructured":"K. Amino, T. Sugawara, T. Arai, Idiosyncrasy of nasal sounds in human speaker identification and their acoustic properties. Acoust. Sci. Technol. 27, 233\u2013235 (2006). https:\/\/doi.org\/10.1250\/ast.27.233","journal-title":"Acoust. Sci. Technol."},{"key":"2360_CR3","doi-asserted-by":"publisher","unstructured":"S. Bhati, S. Nayak, K. Murty, Unsupervised speech signal to symbol transformation for zero resource speech applications (2017), pp. 2133\u20132137. https:\/\/doi.org\/10.21437\/Interspeech.2017-1476","DOI":"10.21437\/Interspeech.2017-1476"},{"issue":"6","key":"2360_CR4","doi-asserted-by":"publisher","first-page":"1441","DOI":"10.1121\/1.1910246","volume":"40","author":"PD Bricker","year":"1966","unstructured":"P.D. Bricker, S. Pruzansky, Effects of stimulus content and duration on talker identification. J. Acoust. Soc. Am. 40(6), 1441\u20131449 (1966)","journal-title":"J. Acoust. Soc. Am."},{"key":"2360_CR5","unstructured":"T. Chen, S. Kornblith, M. Norouzi, G. Hinton, A simple framework for contrastive learning of visual representations. In International Conference on Machine Learning (PMLR, 2020), pp. 1597\u20131607"},{"issue":"12","key":"2360_CR6","doi-asserted-by":"publisher","first-page":"2041","DOI":"10.1109\/TASLP.2019.2938863","volume":"27","author":"J Chorowski","year":"2019","unstructured":"J. Chorowski, R.J. Weiss, S. Bengio, A. van den Oord, Unsupervised speech representation learning using wavenet autoencoders. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(12), 2041\u20132053 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"4","key":"2360_CR7","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2010","unstructured":"N. Dehak, P.J. Kenny, R. Dehak, P. Dumouchel, P. Ouellet, Front-end factor analysis for speaker verification. IEEE Trans. Audio Speech Lang. Process. 19(4), 788\u2013798 (2010)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2360_CR8","doi-asserted-by":"publisher","unstructured":"C.T. Do, C. Barras, V.B. Le, A.K. Sarkar, Augmenting short-term cepstral features with long-term discriminative features for speaker verification of telephone data. In Proceedings of the Interspeech 2013 (2013), pp. 2484\u20132488. https:\/\/doi.org\/10.21437\/Interspeech.2013-415","DOI":"10.21437\/Interspeech.2013-415"},{"key":"2360_CR9","doi-asserted-by":"crossref","unstructured":"J.P. Eatock, J.S. Mason, A quantitative assessment of the relative speaker discriminating properties of phonemes. In Proceedings of ICASSP\u201994. IEEE International Conference on Acoustics, Speech and Signal Processing (IEEE, 1994), vol.\u00a01, pp. I\u2013133","DOI":"10.1109\/ICASSP.1994.389337"},{"key":"2360_CR10","doi-asserted-by":"crossref","unstructured":"J.P. Eatock, J.S.D. Mason, Phoneme performance in speaker recognition. In Proceedings of the 2nd International Conference on Spoken Language Processing (ICSLP 1992) (1992), pp. 1411\u20131414","DOI":"10.21437\/ICSLP.1992-377"},{"key":"2360_CR11","unstructured":"J.S. Garofolo, Timit acoustic phonetic continuous speech corpus. Linguistic Data Consortium, 1993 (1993)"},{"key":"2360_CR12","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1016\/j.csl.2017.04.008","volume":"46","author":"H Kamper","year":"2017","unstructured":"H. Kamper, A. Jansen, S. Goldwater, A segmental framework for fully-unsupervised large-vocabulary speech recognition. Comput. Speech Lang. 46, 154\u2013174 (2017). https:\/\/doi.org\/10.1016\/j.csl.2017.04.008","journal-title":"Comput. Speech Lang."},{"key":"2360_CR13","doi-asserted-by":"publisher","first-page":"1476","DOI":"10.1038\/1961253a0","volume":"196","author":"L Kersta","year":"1962","unstructured":"L. Kersta, Voiceprint identification. Nature 196, 1476\u20134687 (1962)","journal-title":"Nature"},{"issue":"1","key":"2360_CR14","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1016\/j.specom.2009.08.009","volume":"52","author":"T Kinnunen","year":"2010","unstructured":"T. Kinnunen, H. Li, An overview of text-independent speaker recognition: from features to supervectors. Speech Commun. 52(1), 12\u201340 (2010)","journal-title":"Speech Commun."},{"key":"2360_CR15","doi-asserted-by":"publisher","first-page":"EL100","DOI":"10.1121\/1.3204765","volume":"126","author":"BJ Lee","year":"2009","unstructured":"B.J. Lee, J.Y. Choi, H.G. Kang, Phonetically optimized speaker modeling for robust speaker recognition. J. Acoust. Soc. Am. 126, EL100\u2013EL106 (2009). https:\/\/doi.org\/10.1121\/1.3204765","journal-title":"J. Acoust. Soc. Am."},{"key":"2360_CR16","doi-asserted-by":"publisher","unstructured":"Y. Lei, N. Scheffer, L. Ferrer, M. McLaren, A novel scheme for speaker recognition using a phonetically-aware deep neural network. In 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2014), pp. 1695\u20131699. https:\/\/doi.org\/10.1109\/ICASSP.2014.6853887","DOI":"10.1109\/ICASSP.2014.6853887"},{"key":"2360_CR17","doi-asserted-by":"crossref","unstructured":"S. Ling, J. Salazar, Y. Liu, K. Kirchhoff, A. Amazon, Bertphone: phonetically-aware encoder representations for utterance-level speaker and language recognition. In Proceeding of the Odyssey (2020), pp. 9\u201316","DOI":"10.21437\/Odyssey.2020-2"},{"key":"2360_CR18","doi-asserted-by":"crossref","unstructured":"Y. Liu, L. He, J. Liu, M.T. Johnson, Speaker embedding extraction with phonetic information. arXiv preprint arXiv:1804.04862 (2018)","DOI":"10.21437\/Interspeech.2018-1226"},{"key":"2360_CR19","doi-asserted-by":"crossref","unstructured":"A. Lozano-Diez, A. Silnova, P. Matejka, O. Glembek, O. Plchot, J. Pesan, L. Burget, J. Gonzalez-Rodriguez, Analysis and optimization of bottleneck features for speaker recognition. In Odyssey vol 2016 (2016), pp. 352\u2013357","DOI":"10.21437\/Odyssey.2016-51"},{"key":"2360_CR20","first-page":"257","volume":"1","author":"SJ Luck","year":"1998","unstructured":"S.J. Luck, Neurophysiology of selective attention. Attention 1, 257\u2013295 (1998)","journal-title":"Attention"},{"key":"2360_CR21","doi-asserted-by":"publisher","unstructured":"M. McLaren, Y. Lei, L. Ferrer, Advances in deep neural network approaches to speaker recognition. In 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2015), pp. 4814\u20134818. https:\/\/doi.org\/10.1109\/ICASSP.2015.7178885","DOI":"10.1109\/ICASSP.2015.7178885"},{"key":"2360_CR22","unstructured":"V. Mnih, N. Heess, A. Graves, et\u00a0al. Recurrent models of visual attention. In Advances in Neural Information Processing Systems, vol 27 (2014)"},{"key":"2360_CR23","unstructured":"F. Nolan, The phonetic bases of speaker recognition. Ph.D. thesis, University of Cambridge (1980)"},{"key":"2360_CR24","doi-asserted-by":"crossref","unstructured":"V. Panayotov, G. Chen, D. Povey, S. Khudanpur, Librispeech: an asr corpus based on public domain audio books. In 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2015), pp. 5206\u20135210","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"2360_CR25","volume-title":"Discrete-Time Speech Signal Processing: Principles and Practice","author":"TF Quatieri","year":"2006","unstructured":"T.F. Quatieri, Discrete-Time Speech Signal Processing: Principles and Practice (Pearson Education India, Bengaluru, 2006)"},{"key":"2360_CR26","doi-asserted-by":"publisher","DOI":"10.1561\/9781601980717","volume-title":"Introduction to Digital Speech Processing","author":"LR Rabiner","year":"2007","unstructured":"L.R. Rabiner, R.W. Schafer, Introduction to Digital Speech Processing, vol. 1 (Now Publishers Inc, Delft, 2007)"},{"key":"2360_CR27","doi-asserted-by":"crossref","unstructured":"S. Sankala, B.S.M. Rafi, S.R.M. Kodukula, Self attentive context dependent speaker embedding for speaker verification. In 2020 National Conference on Communications (NCC) (IEEE, 2020), pp. 1\u20135","DOI":"10.1109\/NCC48643.2020.9056043"},{"key":"2360_CR28","doi-asserted-by":"crossref","unstructured":"S. Schneider, A. Baevski, R. Collobert, M. Auli, wav2vec: unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862 (2019)","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"2360_CR29","unstructured":"R. Shwartz-Ziv, N. Tishby, Opening the black box of deep neural networks via information. arXiv preprint arXiv:1703.00810 (2017)"},{"key":"2360_CR30","doi-asserted-by":"crossref","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, D. Povey, S. Khudanpur, X-vectors: robust dnn embeddings for speaker recognition. In 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2018), pp. 5329\u20135333","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"2360_CR31","doi-asserted-by":"publisher","unstructured":"S. Sreekanth, S.M. Rafi\u00a0B, K. Sri Rama\u00a0Murty, S. Bhati, Speaker embedding extraction with virtual phonetic information. In 2019 IEEE Global Conference on Signal and Information Processing (GlobalSIP) (2019), pp. 1\u20135. https:\/\/doi.org\/10.1109\/GlobalSIP45357.2019.8969551","DOI":"10.1109\/GlobalSIP45357.2019.8969551"},{"key":"2360_CR32","doi-asserted-by":"publisher","unstructured":"H. Su, S. Wegmann, Factor analysis based speaker verification using asr. In Interspeech 2016 (2016), pp. 2223\u20132227. https:\/\/doi.org\/10.21437\/Interspeech.2016-1157","DOI":"10.21437\/Interspeech.2016-1157"},{"key":"2360_CR33","doi-asserted-by":"crossref","unstructured":"H. van\u00a0den Heuvel, T.C. Rietveld, Speaker related variability in cepstral representations of Dutch speech segments. In ICSLP (1992)","DOI":"10.21437\/ICSLP.1992-393"},{"issue":"3","key":"2360_CR34","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1177\/002383099003300302","volume":"33","author":"WA Van Dommelen","year":"1990","unstructured":"W.A. Van Dommelen, Acoustic parameters in human speaker recognition. Lang. Speech 33(3), 259\u2013272 (1990)","journal-title":"Lang. Speech"},{"key":"2360_CR35","doi-asserted-by":"publisher","unstructured":"E. Variani, X. Lei, E. McDermott, I.L. Moreno, J. Gonzalez-Dominguez, Deep neural networks for small footprint text-dependent speaker verification. In 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2014), pp. 4052\u20134056. https:\/\/doi.org\/10.1109\/ICASSP.2014.6854363","DOI":"10.1109\/ICASSP.2014.6854363"},{"issue":"6","key":"2360_CR36","doi-asserted-by":"publisher","first-page":"1065","DOI":"10.1121\/1.1919153","volume":"36","author":"WD Voiers","year":"1964","unstructured":"W.D. Voiers, Perceptual bases of speaker identity. J. Acoust. Soc. Am. 36(6), 1065\u20131073 (1964)","journal-title":"J. Acoust. Soc. Am."},{"key":"2360_CR37","doi-asserted-by":"publisher","unstructured":"Q. Wang, K. Okabe, K.A. Lee, H. Yamamoto, T. Koshinaka, Attention mechanism in speaker recognition: What does it learn in deep speaker embedding? In 2018 IEEE Spoken Language Technology Workshop (SLT) (2018), pp. 1052\u20131059. https:\/\/doi.org\/10.1109\/SLT.2018.8639586","DOI":"10.1109\/SLT.2018.8639586"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02360-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02360-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02360-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T01:06:09Z","timestamp":1729213569000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02360-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,11]]},"references-count":37,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["2360"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02360-z","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2023,4,11]]},"assertion":[{"value":"3 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 March 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}