{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:27:52Z","timestamp":1740122872728,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,11,30]],"date-time":"2017-11-30T00:00:00Z","timestamp":1512000000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001413","name":"Indian Space Research Organisation","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001413","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2018,9]]},"DOI":"10.1007\/s10772-017-9477-2","type":"journal-article","created":{"date-parts":[[2017,11,30]],"date-time":"2017-11-30T10:00:09Z","timestamp":1512036009000},"page":"473-488","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Improved i-vector extraction technique for speaker verification with short utterances"],"prefix":"10.1007","volume":"21","author":[{"given":"Arnab","family":"Poddar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Md","family":"Sahidullah","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Goutam","family":"Saha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,11,30]]},"reference":[{"issue":"2","key":"9477_CR1","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1109\/TASL.2006.881689","volume":"15","author":"P Angkititrakul","year":"2007","unstructured":"Angkititrakul, P., & Hansen, J. H. (2007). Discriminative in-set\/out-of-set speaker recognition. IEEE Transactions on Audio, Speech, and Language Processing, 15(2), 498\u2013508.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"7","key":"9477_CR2","doi-asserted-by":"publisher","first-page":"2072","DOI":"10.1109\/TASL.2007.902870","volume":"15","author":"N Brummer","year":"2007","unstructured":"Brummer, N., Burget, L., Cernocky, H., Glembek, O., Grezl, F., Karafiat, M., et al. (2007). Fusion of heterogeneous speaker recognition systems in the SBTU submission for the NIST speaker recognition evaluation 2006. IEEE Transactions on Audio, Speech, and Language Processing, 15(7), 2072\u20132084.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"doi-asserted-by":"crossref","unstructured":"Cai, W., Li, M., Li, L., & Hong, Q. (2015). Duration dependent covariance regularization in plda modeling for speaker verification. In INTERSPEECH (pp. 1027\u20131031).","key":"9477_CR3","DOI":"10.21437\/Interspeech.2015-278"},{"issue":"5","key":"9477_CR4","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1109\/LSP.2006.870086","volume":"13","author":"WM Campbell","year":"2006","unstructured":"Campbell, W. M., Sturim, D. E., & Reynolds, D. A. (2006a). Support vector machines using GMM supervectors for speaker verification. IEEE Signal Processing Letters, 13(5), 308\u2013311.","journal-title":"IEEE Signal Processing Letters"},{"unstructured":"Campbell, W. M., Sturim, D. E., Reynolds, D. A., & Solomonoff, A. (2006b). SVM based speaker verification using a GMM supervector kernel and NAP variability compensation. In IEEE International Conference on Acoustics, Speech and Signal Processing, (ICASSP), IEEE.","key":"9477_CR5"},{"issue":"9","key":"9477_CR6","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1109\/5.628714","volume":"85","author":"JP Campbell Jr","year":"1997","unstructured":"Campbell, J. P, Jr. (1997). Speaker recognition: A tutorial. Proceedings of the IEEE, 85(9), 1437\u20131462.","journal-title":"Proceedings of the IEEE"},{"issue":"4","key":"9477_CR7","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"SB Davis","year":"1980","unstructured":"Davis, S. B., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech and Signal Processing, 28(4), 357\u2013366.","journal-title":"IEEE Transactions on Acoustics, Speech and Signal Processing"},{"issue":"4","key":"9477_CR8","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak, N., Kenny, P., Dehak, R., Dumouchel, P., & Ouellet, P. (2011). Front-end factor analysis for speaker verification. IEEE Transactions on Audio, Speech, and Language Processing, 19(4), 788\u2013798.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9477_CR9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster, A. P., Laird, N. M., & Rubin, D. B. (1977). Maximum likelihood from incomplete data via the EM algorithm. Journal of the Royal Statistical Society Series B, 39, 1\u201338.","journal-title":"Journal of the Royal Statistical Society Series B"},{"doi-asserted-by":"crossref","unstructured":"Fauve, B. G., Evans, N. W., Pearson, N., Bonastre, J. F., & Mason, J. S. (2007). Influence of task duration in text-independent speaker verification. In Proceedings of INTERSPEECH, ISCA (pp. 794\u2013797).","key":"9477_CR10","DOI":"10.21437\/Interspeech.2007-151"},{"unstructured":"Fauve, B. G., Evans, N. W., & Mason, J. S. (2008). Improving the performance of text-independent short duration SVM-and GMM-based speaker verification. In Odyssey, ISCA (p. 18).","key":"9477_CR11"},{"doi-asserted-by":"crossref","unstructured":"Ferrer, L., Bratt, H., Kajarekar, S., Shriberg, E., S\u00f6nmez, K., Stolcke, A., & Venkataraman, A. (2003). Modeling duration patterns for speaker recognition (pp. 2017\u20132020).","key":"9477_CR12","DOI":"10.21437\/Eurospeech.2003-580"},{"issue":"2","key":"9477_CR13","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1109\/89.279278","volume":"2","author":"JL Gauvain","year":"1994","unstructured":"Gauvain, J. L., & Lee, C. H. (1994). Maximum a posteriori estimation for multivariate Gaussian mixture observations of Markov chains. IEEE Transactions on Speech and Audio Processing, 2(2), 291\u2013298.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"7","key":"9477_CR14","doi-asserted-by":"publisher","first-page":"1890","DOI":"10.1109\/TASL.2010.2102753","volume":"19","author":"T Hasan","year":"2011","unstructured":"Hasan, T., & Hansen, J. H. (2011). A study on universal background model training in speaker verification. IEEE Transactions on Audio, Speech, and Language Processing, 19(7), 1890\u20131899.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"doi-asserted-by":"crossref","unstructured":"Hasan, T., Saeidi, R., & Hansen, J. H., van Leeuwen, D. (2013). Duration mismatch compensation for i-vector based speaker recognition systems. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE (pp. 7663\u20137667).","key":"9477_CR15","DOI":"10.1109\/ICASSP.2013.6639154"},{"doi-asserted-by":"crossref","unstructured":"Kanagasundaram, A., Vogt, R., Dean, D. B., Sridharan, S., & Mason, M. W. (2011). I-vector based speaker recognition on short utterances. In Proceedings of INTERSPEECH, ISCA (pp. 2341\u20132344).","key":"9477_CR16","DOI":"10.21437\/Interspeech.2011-58"},{"unstructured":"Kanagasundaram, A., Vogt, R. J., Dean, D. B., & Sridharan, S. (2012). PLDA based speaker recognition on short utterances. In The speaker and language recognition workshop (Odyssey) ISCA.","key":"9477_CR17"},{"key":"9477_CR18","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.specom.2014.01.004","volume":"59","author":"A Kanagasundaram","year":"2014","unstructured":"Kanagasundaram, A., Dean, D., Sridharan, S., Gonzalez-Dominguez, J., Gonzalez-Rodriguez, J., & Ramos, D. (2014). Improving short utterance i-vector speaker verification using utterance variance modelling and compensation techniques. Speech Communication, 59, 69\u201382.","journal-title":"Speech Communication"},{"issue":"2","key":"9477_CR19","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/s10772-017-9402-8","volume":"20","author":"A Kanagasundaram","year":"2017","unstructured":"Kanagasundaram, A., Dean, D., Sridharan, S., Ghaemmaghami, H., & Fookes, C. (2017). A study on the effects of using short utterance length development data in the design of gplda speaker verification systems. International Journal of Speech Technology, 20(2), 247\u2013259.","journal-title":"International Journal of Speech Technology"},{"unstructured":"Kenny, P. (2010). Bayesian speaker verification with heavy-tailed priors. In The speaker and language recognition workshop (Odyssey) ISCA, (pp. 14).","key":"9477_CR20"},{"issue":"4","key":"9477_CR21","doi-asserted-by":"publisher","first-page":"1435","DOI":"10.1109\/TASL.2006.881693","volume":"15","author":"P Kenny","year":"2007","unstructured":"Kenny, P., Boulianne, G., Ouellet, P., & Dumouchel, P. (2007). Joint factor analysis versus eigenchannels in speaker recognition. IEEE Transactions on Audio, Speech, and Language Processing, 15(4), 1435\u20131447.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"5","key":"9477_CR22","doi-asserted-by":"publisher","first-page":"980","DOI":"10.1109\/TASL.2008.925147","volume":"16","author":"P Kenny","year":"2008","unstructured":"Kenny, P., Ouellet, P., Dehak, N., Gupta, V., & Dumouchel, P. (2008). A study of interspeaker variability in speaker verification. IEEE Transactions on Audio, Speech, and Language Processing, 16(5), 980\u2013988.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"9477_CR23","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1016\/j.specom.2009.08.009","volume":"52","author":"T Kinnunen","year":"2010","unstructured":"Kinnunen, T., & Li, H. (2010). An overview of text-independent speaker recognition: From features to supervectors. Speech Communication, 52(1), 12\u201340.","journal-title":"Speech Communication"},{"issue":"10","key":"9477_CR24","doi-asserted-by":"publisher","first-page":"13,487","DOI":"10.1016\/j.eswa.2011.04.069","volume":"38","author":"P Krishnamoorthy","year":"2011","unstructured":"Krishnamoorthy, P., Jayanna, H., & Prasanna, S. (2011). Speaker recognition under limited data condition by noise addition. Expert Systems with Applications, 38(10), 13,487\u201313,490.","journal-title":"Expert Systems with Applications"},{"issue":"6","key":"9477_CR25","doi-asserted-by":"publisher","first-page":"1129","DOI":"10.1109\/TASLP.2016.2544660","volume":"24","author":"L Li","year":"2016","unstructured":"Li, L., Wang, D., Zhang, C., & Zheng, T. F. (2016a). Improving short utterance speaker recognition by modeling speech unit classes. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 24(6), 1129\u20131139.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"doi-asserted-by":"crossref","unstructured":"Li, L., Wang, D., Zhang, X., Zheng, T. F., & Jin, P. (2016b). System combination for short utterance speaker recognition. In Signal and Information Processing Association Annual Summit and Conference (APSIPA), Asia-Pacific, IEEE, (pp. 1\u20135).","key":"9477_CR26","DOI":"10.1109\/APSIPA.2016.7820903"},{"issue":"4","key":"9477_CR27","doi-asserted-by":"publisher","first-page":"940","DOI":"10.1016\/j.csl.2014.02.004","volume":"28","author":"M Li","year":"2014","unstructured":"Li, M., & Narayanan, S. (2014). Simplified supervised i-vector modeling with application to robust and efficient language identification and speaker verification. Computer Speech & Language, 28(4), 940\u2013958.","journal-title":"Computer Speech & Language"},{"key":"9477_CR28","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.specom.2016.02.008","volume":"80","author":"W Li","year":"2016","unstructured":"Li, W., Fu, T., You, H., Zhu, J., & Chen, N. (2016c). Feature sparsity analysis for i-vector based speaker verification. Speech Communication, 80, 60\u201370.","journal-title":"Speech Communication"},{"doi-asserted-by":"crossref","unstructured":"Mandasari, M.I., McLaren, M., & van Leeuwen, D. A. (2011). Evaluation of i-vector speaker recognition systems for forensic application. In Proceedings of INTERSPEECH, ISCA (pp. 21\u201324).","key":"9477_CR29","DOI":"10.21437\/Interspeech.2011-6"},{"unstructured":"NIST. (2008). The NIST year 2008 speaker recognition evaluation plan. Technical report, NIST.","key":"9477_CR30"},{"unstructured":"NIST. (2010). The NIST year 2010 speaker recognition evaluation plan. Technical report, NIST.","key":"9477_CR31"},{"doi-asserted-by":"crossref","unstructured":"Poddar, A., Sahidullah, M., & Saha, G. (2015). Performance comparison of speaker recognition systems in presence of duration variability. In Annual IEEE India Conference (INDICON), IEEE (pp. 1\u20136).","key":"9477_CR32","DOI":"10.1109\/INDICON.2015.7443464"},{"unstructured":"Poddar, A., Sahidullah, M., & Saha, G. (2017). An adaptive i-vector extraction for speaker verification with short utterance. In Proc. of International Conference on Pattern Recognition and Machine Intelligence (PReMI 2017), Berlin: Springer.","key":"9477_CR33"},{"doi-asserted-by":"crossref","unstructured":"Poorjam, A. H., Saeidi, R., Kinnunen, T., & Hautam\u00e4ki, V. (2016). Incorporating uncertainty as a quality measure in i-vector based language recognition. Odyssey pp. 74\u201380.","key":"9477_CR34","DOI":"10.21437\/Odyssey.2016-11"},{"issue":"1","key":"9477_CR35","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1109\/89.365379","volume":"3","author":"DA Reynolds","year":"1995","unstructured":"Reynolds, D. A., & Rose, R. C. (1995). Robust text-independent speaker identification using gaussian mixture speaker models. IEEE transactions on speech and audio processing, 3(1), 72\u201383.","journal-title":"IEEE transactions on speech and audio processing"},{"issue":"1","key":"9477_CR36","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds, D. A., Quatieri, T. F., & Dunn, R. B. (2000). Speaker verification using adapted Gaussian mixture models. Digital Signal Processing, 10(1), 19\u201341.","journal-title":"Digital Signal Processing"},{"key":"9477_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.dsp.2015.10.011","volume":"50","author":"M Sahidullah","year":"2016","unstructured":"Sahidullah, M., & Kinnunen, T. (2016). Local spectral variability features for speaker verification. Digital Signal Processing, 50, 1\u201311.","journal-title":"Digital Signal Processing"},{"unstructured":"Sahidullah, M., & Saha, G. (2012a). Comparison of speech activity detection techniques for speaker recognition. arXiv preprint arXiv:12100297","key":"9477_CR38"},{"issue":"4","key":"9477_CR39","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.specom.2011.11.004","volume":"54","author":"M Sahidullah","year":"2012","unstructured":"Sahidullah, M., & Saha, G. (2012b). Design, analysis and experimental evaluation of block based transformation in MFCC computation for speaker recognition. Speech Communication, 54(4), 543\u2013565.","journal-title":"Speech Communication"},{"issue":"2","key":"9477_CR40","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1109\/LSP.2012.2235067","volume":"20","author":"M Sahidullah","year":"2013","unstructured":"Sahidullah, M., & Saha, G. (2013). A novel windowing technique for efficient computation of MFCC for speaker recognition. IEEE Signal Processing Letters, 20(2), 149\u2013152.","journal-title":"IEEE Signal Processing Letters"},{"doi-asserted-by":"crossref","unstructured":"Sarkar, A. K., Matrouf, D., Bousquet, P. M., & Bonastre, J. F. (2012). Study of the effect of i-vector modeling on short and mismatch utterance duration for speaker verification. In Proceedings of INTERSPEECH ISCA.","key":"9477_CR41","DOI":"10.21437\/Interspeech.2012-347"},{"unstructured":"Shum, S. (2011). Unsupervised methods for speaker diarization. PhD thesis, Massachusetts Institute of Technology.","key":"9477_CR42"},{"issue":"2","key":"9477_CR43","doi-asserted-by":"publisher","first-page":"1515","DOI":"10.1121\/1.3672707","volume":"131","author":"JW Suh","year":"2012","unstructured":"Suh, J. W., & Hansen, J. H. (2012). Acoustic hole filling for sparse enrollment data using a cohort universal corpus for speaker recognition. The Journal of the Acoustical Society of America, 131(2), 1515\u20131528.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"7","key":"9477_CR44","doi-asserted-by":"publisher","first-page":"1118","DOI":"10.1109\/TASLP.2015.2419978","volume":"23","author":"M Segbroeck Van","year":"2015","unstructured":"Van Segbroeck, M., Travadi, R., & Narayanan, S. S. (2015). Rapid language identification. IEEE Transactions on Audio, Speech, and Language Processing, 23(7), 1118\u20131129.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9477-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9477-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9477-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,29]],"date-time":"2024-06-29T07:44:43Z","timestamp":1719647083000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9477-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,11,30]]},"references-count":44,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2018,9]]}},"alternative-id":["9477"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9477-2","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2017,11,30]]},"assertion":[{"value":"29 June 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 November 2017","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}