{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T01:57:49Z","timestamp":1747447069009},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2021,6,14]],"date-time":"2021-06-14T00:00:00Z","timestamp":1623628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,14]],"date-time":"2021-06-14T00:00:00Z","timestamp":1623628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s00034-021-01747-0","type":"journal-article","created":{"date-parts":[[2021,6,14]],"date-time":"2021-06-14T20:02:56Z","timestamp":1623700976000},"page":"6016-6034","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Speaker Verification from Codec-Distorted Speech Through Combination of Affine Transform and Feature Switching"],"prefix":"10.1007","volume":"40","author":[{"given":"M. S.","family":"Athulya","sequence":"first","affiliation":[]},{"given":"P. S.","family":"Sathidevi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,14]]},"reference":[{"issue":"1\u20133","key":"1747_CR1","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.forsciint.2008.11.018","volume":"185","author":"K Amino","year":"2009","unstructured":"K. Amino, T. Arai, Speaker-dependent characteristics of the nasals. Forensic Sci. Int. 185(1\u20133), 21\u201328 (2009)","journal-title":"Forensic Sci. Int."},{"key":"1747_CR2","doi-asserted-by":"crossref","unstructured":"T. Asha, M. Saranya, D.K. Pandia, S. Madikeri, H.A. Murthy, Feature switching in the i-vector framework for speaker verification, in Fifteenth Annual Conference of the International Speech Communication Association (2014)","DOI":"10.21437\/Interspeech.2014-288"},{"key":"1747_CR3","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.diin.2018.03.005","volume":"25","author":"MS Athulya","year":"2018","unstructured":"M.S. Athulya, P.S. Sathidevi, Speaker verification from codec distorted speech for forensic investigation through serial combination of classifiers. Digit. Investig. 25, 70\u201377 (2018)","journal-title":"Digit. Investig."},{"key":"1747_CR4","doi-asserted-by":"crossref","unstructured":"L. Besacier, S. Grassi, A. Dufaux, M. Ansorge, F. Pellandini. GSM speech coding and speaker recognition, in 2000 IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (Cat. No. 00CH37100), vol.\u00a02, pp. II1085\u2013II1088. IEEE (2000)","DOI":"10.1109\/ICASSP.2000.859152"},{"issue":"9","key":"1747_CR5","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1049\/iet-spr.2011.0270","volume":"6","author":"O B\u00fcy\u00fck","year":"2012","unstructured":"O. B\u00fcy\u00fck, L.M. Arslan, Combining log-spectral mean subtraction at different frequency resolutions for handset-channel compensation in single utterance speaker verification. IET Signal Proc. 6(9), 824\u2013828 (2012)","journal-title":"IET Signal Proc."},{"key":"1747_CR6","doi-asserted-by":"crossref","unstructured":"J.K. Chaitanya, R. Janakiraman, H.A. Murthy, Kl divergence based feature switching in the linguistic search space for automatic speech recognition, in 2010 National Conference On Communications (NCC), pp. 1\u20135. IEEE (2010)","DOI":"10.1109\/NCC.2010.5430186"},{"key":"1747_CR7","doi-asserted-by":"crossref","unstructured":"Q. Dan, Y. Honggang, T. Hui, W. Bingxi, Two schemes for automatic speaker recognition over voip, in 2008 IEEE Pacific-Asia Workshop on Computational Intelligence and Industrial Application, vol.\u00a02, pp. 695\u2013699. IEEE (2008)","DOI":"10.1109\/PACIIA.2008.224"},{"issue":"4","key":"1747_CR8","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"S. Davis, P. Mermelstein, Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Signal Process. 28(4), 357\u2013366 (1980)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"1747_CR9","doi-asserted-by":"crossref","unstructured":"M. Debyeche, A. Krobba, A. Amrouche, Effect of GSM speech coding on the performance of speaker recognition system, in 10th International Conference on Information Science, Signal Processing and their Applications (ISSPA 2010), pp. 137\u2013140. IEEE (2010)","DOI":"10.1109\/ISSPA.2010.5605487"},{"key":"1747_CR10","doi-asserted-by":"crossref","unstructured":"R. Dunn, T. Quatieri, D. Reynolds, J. Campbell, Speaker recognition from coded speech and the effects of score normalization, in Conference Record of Thirty-Fifth Asilomar Conference on Signals, Systems and Computers (Cat. No. 01CH37256), vol.\u00a02, pp. 1562\u20131567. IEEE (2001)","DOI":"10.1109\/ACSSC.2001.987749"},{"key":"1747_CR11","doi-asserted-by":"crossref","unstructured":"W. Eric, M.W. Mak, S.Y. Kung, Speaker verification from coded telephone speech using stochastic feature transformation and handset identification, in Pacific-Rim Conference on Multimedia, pp. 598\u2013606. Springer (2002)","DOI":"10.1007\/3-540-36228-2_74"},{"key":"1747_CR12","doi-asserted-by":"crossref","unstructured":"W. Fakhr, A. AbdelSalam, N. Hamdy, Enhancement of mismatched conditions in speaker recognition for multimedia applications, in 2004 IEEE International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a01, pp. I\u2013377. IEEE (2004)","DOI":"10.1109\/ICASSP.2004.1326001"},{"key":"1747_CR13","unstructured":"J.S. Garofolo, Timit acoustic-phonetic continuous speech corpus. https:\/\/catalog.ldc.upenn.edu\/LDC93S1\/. Accessed 05 July 2018"},{"key":"1747_CR14","unstructured":"J.S. Garofolo, Timit acoustic phonetic continuous speech corpus. Linguistic Data Consortium, 1993 (1993)"},{"key":"1747_CR15","unstructured":"S. Grassi, L. Besacier, A. Dufaux, M. Ansorge, F. Pellandini, Influence of GSM speech coding on the performance of text-independent speaker recognition, in 2000 10th European Signal Processing Conference, pp. 1\u20134. IEEE (2000)"},{"key":"1747_CR16","unstructured":"B.J. Guillemin, C.I. Watson, Impact of the GSM AMR speech codec on formant information important to forensic speaker identification, in Proceedings of the 11th Australian International Conference on Speech Science & Technology, pp. 483\u2013488 (2006)"},{"issue":"11\u201313","key":"1747_CR17","first-page":"1307","volume":"18","author":"P Henderson","year":"1997","unstructured":"P. Henderson, Sammon mapping. Pattern Recognit. Lett. 18(11\u201313), 1307\u20131316 (1997)","journal-title":"Pattern Recognit. Lett."},{"key":"1747_CR18","doi-asserted-by":"crossref","unstructured":"M.E. Houle, H.P. Kriegel, P. Kr\u00f6ger, E. Schubert, A. Zimek, Can shared-neighbor distances defeat the curse of dimensionality? in International Conference on Scientific and Statistical Database Management, pp. 482\u2013500. Springer (2010)","DOI":"10.1007\/978-3-642-13818-8_34"},{"key":"1747_CR19","doi-asserted-by":"crossref","unstructured":"M. Hunt, M. Lennig, P. Mermelstein, Experiments in syllable-based recognition of continuous speech, in ICASSP\u201980. IEEE International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a05, pp. 880\u2013883. Citeseer (1980)","DOI":"10.1109\/ICASSP.1980.1170934"},{"key":"1747_CR20","doi-asserted-by":"crossref","unstructured":"E.T. Imen, A.A. Imen, M. Debyeche, Framework for VOIP speech database generation and a comparaison of different features extraction methodes for speaker identification on VOIP, in 2015 3rd International Conference on Control, Engineering & Information Technology (CEIT), pp. 1\u20135. IEEE (2015)","DOI":"10.1109\/CEIT.2015.7233101"},{"issue":"4","key":"1747_CR21","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1049\/iet-bmt.2016.0119","volume":"6","author":"R Jarina","year":"2017","unstructured":"R. Jarina, J. Polack\u1ef3, P. Po\u010dta, M. Chmul\u00edk, Automatic speaker verification on narrowband and wideband lossy coded clean speech. IET Biometrics 6(4), 276\u2013281 (2017)","journal-title":"IET Biometrics"},{"key":"1747_CR22","doi-asserted-by":"crossref","unstructured":"T. Jiang, B. Gao, J. Han, Speaker identification and verification from audio coded speech in matched and mismatched conditions, in 2009 IEEE International Conference on Robotics and Biomimetics (ROBIO), pp. 2199\u20132204. IEEE (2009)","DOI":"10.1109\/ROBIO.2009.5420478"},{"issue":"7","key":"1747_CR23","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1109\/TASLP.2016.2545928","volume":"24","author":"C Kim","year":"2016","unstructured":"C. Kim, R.M. Stern, Power-normalized cepstral coefficients (PNCC) for robust speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP) 24(7), 1315\u20131329 (2016)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP)"},{"key":"1747_CR24","unstructured":"Lawrence, R. Fundamentals of Speech Recognition. Pearson Education India (2008)"},{"key":"1747_CR25","unstructured":"R. Mammone, X. Zhang: Robust speech processing with affine transform replicated data (2000). US Patent 6,038,528"},{"issue":"5","key":"1747_CR26","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1109\/79.536825","volume":"13","author":"RJ Mammone","year":"1996","unstructured":"R.J. Mammone, X. Zhang, R.P. Ramachandran, Robust speaker recognition: a feature-based approach. IEEE Signal Process. Mag. 13(5), 58 (1996)","journal-title":"IEEE Signal Process. Mag."},{"key":"1747_CR27","doi-asserted-by":"crossref","unstructured":"R.W. Mudrowsky, R.P. Ramachandran, S.S. Shetty, The affine transform and feature fusion for robust speaker identification in the presence of speech coding distortion, in 2010 IEEE Asia Pacific Conference on Circuits and Systems, pp. 1063\u20131066. IEEE (2010)","DOI":"10.1109\/APCCAS.2010.5774905"},{"key":"1747_CR28","unstructured":"A. Nagrani, J.S. Chung, A. Zisserman, The voxceleb1 dataset. http:\/\/www.robots.ox.ac.uk\/~vgg\/data\/voxceleb\/vox1.html. Accessed 05 July 2020"},{"key":"1747_CR29","doi-asserted-by":"crossref","unstructured":"A. Nagrani, J.S. Chung, A. Zisserman, Voxceleb: a large-scale speaker identification dataset. In: INTERSPEECH (2017)","DOI":"10.21437\/Interspeech.2017-950"},{"key":"1747_CR30","doi-asserted-by":"crossref","unstructured":"N. Nandan, G. Saha, On the performance of IP and mobile based automatic speaker verification, in 2012 National Conference on Communications (NCC), pp. 1\u20135. IEEE (2012)","DOI":"10.1109\/NCC.2012.6176750"},{"key":"1747_CR31","unstructured":"R. Padmanabhan, R.M. Hegde, H.A. Murthy, Dynamic selection of magnitude and phase based acoustic feature streams for speaker verification, in 2009 17th European Signal Processing Conference, pp. 1244\u20131248. IEEE (2009)"},{"key":"1747_CR32","doi-asserted-by":"crossref","unstructured":"R. Padmanabhan, H.A. Murthy, Acoustic feature diversity and speaker verification, in Eleventh Annual Conference of the International Speech Communication Association (2010)","DOI":"10.21437\/Interspeech.2010-157"},{"key":"1747_CR33","doi-asserted-by":"crossref","unstructured":"M. Petracca, A. Servetti, J. De\u00a0Martin, Performance analysis of compressed-domain automatic speaker recognition as a function of speech coding technique and bit rate, in 2006 IEEE International Conference on Multimedia and Expo, pp. 1393\u20131396. IEEE (2006)","DOI":"10.1109\/ICME.2006.262799"},{"key":"1747_CR34","doi-asserted-by":"crossref","unstructured":"M. Phythian, J. Ingram, S. Sridharan, Effects of speech coding on text-dependent speaker recognition, in TENCON\u201997 Brisbane-Australia. Proceedings of IEEE TENCON\u201997. IEEE Region 10 Annual Conference. Speech and Image Technologies for Computing and Telecommunications (Cat. No. 97CH36162), vol.\u00a01, pp. 137\u2013140. IEEE (1997)","DOI":"10.1109\/TENCON.1997.647276"},{"key":"1747_CR35","doi-asserted-by":"crossref","unstructured":"J. Polacky, R. Jarina, M. Chmulik, Assessment of automatic speaker verification on lossy transcoded speech, in 2016 4th International Conference on Biometrics and Forensics (IWBF), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/IWBF.2016.7449679"},{"issue":"1","key":"1747_CR36","first-page":"23","volume":"18","author":"J Polacky","year":"2016","unstructured":"J. Polacky, P. Pocta, R. Jarina, An impact of narrowband speech codec mismatch on a performance of GMM-UBM speaker recognition over telecommunication channel. Commun. Sci. Lett. Univ. Zilina 18(1), 23\u201328 (2016)","journal-title":"Commun. Sci. Lett. Univ. Zilina"},{"key":"1747_CR37","doi-asserted-by":"crossref","unstructured":"J. Polacky, P. Pocta, R. Jarina, An impact of wideband speech codec mismatch on a performance of GMM-UBM speaker verification over telecommunication channel, in 2016 ELEKTRO, pp. 77\u201382. IEEE (2016)","DOI":"10.1109\/ELEKTRO.2016.7512039"},{"key":"1747_CR38","doi-asserted-by":"crossref","unstructured":"T.F. Quatieri, E. Singer, R.B. Dunn, D.A. Reynolds, J.P. Campbell, Speaker and Language Recognition Using Speech Codec Parameters. Tech. rep, Massachusetts Inst of Tech Lexington Lincoln Lab (1999)","DOI":"10.21236\/ADA526525"},{"issue":"1\u20133","key":"1747_CR39","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"D.A. Reynolds, T.F. Quatieri, R.B. Dunn, Speaker verification using adapted gaussian mixture models. Digit. Signal Proc. 10(1\u20133), 19\u201341 (2000)","journal-title":"Digit. Signal Proc."},{"issue":"1","key":"1747_CR40","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1109\/89.365379","volume":"3","author":"DA Reynolds","year":"1995","unstructured":"D.A. Reynolds, R.C. Rose, Robust text-independent speaker identification using gaussian mixture speaker models. IEEE Trans. Speech Audio Process. 3(1), 72\u201383 (1995)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1747_CR41","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/j.specom.2017.08.004","volume":"93","author":"M Saranya","year":"2017","unstructured":"M. Saranya, R. Padmanabhan, H.A. Murthy, Feature-switching: Dynamic feature selection for an i-vector based speaker verification system. Speech Commun. 93, 53\u201362 (2017)","journal-title":"Speech Commun."},{"key":"1747_CR42","unstructured":"J. Silovsky, P. Cerva, J. Zdansky, Assessment of speaker recognition on lossy codecs used for transmission of speech, in Proceedings ELMAR-2011, pp. 205\u2013208. IEEE (2011)"},{"key":"1747_CR43","doi-asserted-by":"crossref","unstructured":"D. Snyder, D. Garcia-Romero, D. Povey, S. Khudanpur, Deep neural network embeddings for text-independent speaker verification, in Interspeech, pp. 999\u20131003 (2017)","DOI":"10.21437\/Interspeech.2017-620"},{"key":"1747_CR44","doi-asserted-by":"crossref","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, D. Povey, S. Khudanpur, X-vectors: Robust DNN embeddings for speaker recognition, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5329\u20135333. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"1747_CR45","doi-asserted-by":"crossref","unstructured":"D. Snyder, P. Ghahremani, D. Povey, D. Garcia-Romero, Y. Carmiel, S. Khudanpur, Deep neural network-based speaker embeddings for end-to-end speaker verification, in 2016 IEEE Spoken Language Technology Workshop (SLT), pp. 165\u2013170. IEEE (2016)","DOI":"10.1109\/SLT.2016.7846260"},{"key":"1747_CR46","doi-asserted-by":"crossref","unstructured":"A. Stauffer, A.D. Lawson, Speaker Recognition on Lossy Compressed Speech Using the Speex Codec Tech. rep, Research Associates for Defense Conversion (RADC) Marcy NY (2009)","DOI":"10.21437\/Interspeech.2009-399"},{"key":"1747_CR47","doi-asserted-by":"crossref","unstructured":"A.K. Vuppala, K.S. Rao, S. Chakrabarti, Effect of speech coding on speaker identification, in 2010 Annual IEEE India Conference (INDICON), pp. 1\u20134. IEEE (2010)","DOI":"10.1109\/INDCON.2010.5712604"},{"key":"1747_CR48","doi-asserted-by":"crossref","unstructured":"N. Wang, L. Wang, Robust speaker recognition based on multi-stream features, in 2016 IEEE International Conference on Consumer Electronics-China (ICCE-China), pp. 1\u20134. IEEE (2016)","DOI":"10.1109\/ICCE-China.2016.7849770"},{"key":"1747_CR49","doi-asserted-by":"crossref","unstructured":"X. Wang, J. Lin, Applying speaker recognition on VOIP auditing, in 2007 International Conference on Machine Learning and Cybernetics, vol.\u00a06, pp. 3577\u20133581. IEEE (2007)","DOI":"10.1109\/ICMLC.2007.4370767"},{"key":"1747_CR50","unstructured":"D. Yessad, A. Amrouche, Fusion strategies for distributed speaker recognition using residual signal based g729 resynthesized speech, in Proceedings of the 16th International Conference on Information Fusion, pp. 432\u2013437. IEEE (2013)"},{"key":"1747_CR51","unstructured":"E.W. Yu, M.W. Mak, C.H. Sit, S.Y. Kung: Speaker verification based on g. 729 and g. 723.1 coder parameters and handset mismatch compensation, in Eighth European Conference on Speech Communication and Technology (2003)"},{"issue":"4","key":"1747_CR52","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1016\/0167-6393(90)90010-7","volume":"9","author":"V Zue","year":"1990","unstructured":"V. Zue, S. Seneff, J. Glass, Speech database development at MIT: timit and beyond. Speech Commun. 9(4), 351\u2013356 (1990)","journal-title":"Speech Commun."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-021-01747-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-021-01747-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-021-01747-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T20:08:00Z","timestamp":1725221280000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-021-01747-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,14]]},"references-count":52,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["1747"],"URL":"https:\/\/doi.org\/10.1007\/s00034-021-01747-0","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2021,6,14]]},"assertion":[{"value":"3 February 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}