{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T05:54:40Z","timestamp":1771480480307,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,6,10]],"date-time":"2019-06-10T00:00:00Z","timestamp":1560124800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2019,6,10]],"date-time":"2019-06-10T00:00:00Z","timestamp":1560124800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771333"],"award-info":[{"award-number":["61771333"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1186\/s13636-019-0151-2","type":"journal-article","created":{"date-parts":[[2019,6,10]],"date-time":"2019-06-10T13:03:06Z","timestamp":1560171786000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Replay attack detection with auditory filter-based relative phase features"],"prefix":"10.1186","volume":"2019","author":[{"given":"Zeyan","family":"Oo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4005-5036","authenticated-orcid":false,"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Khomdet","family":"Phapatanaburi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meng","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seiichi","family":"Nakagawa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masahiro","family":"Iwahashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianwu","family":"Dang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,6,10]]},"reference":[{"key":"151_CR1","unstructured":"M. Todisco, H. Delgado, N. Evans, in Speaker Odyssey Workshop, Bilbao, Spain. A new feature for automatic speaker verification anti-spoofing: Constant q cepstral coefficients, (2016), pp. 249\u2013252. \n                    http:\/\/www.odyssey2016.org\/papers\/pdfs_stamped\/59.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR2","unstructured":"W. Rao, M-W. Mak, K-A. Lee, in 2015 IEEE International Conference On Acoustics, Speech and Signal Processing (ICASSP). Normalization of total variability matrix for i-vector\/plda speaker verification, (2015), pp. 4180\u20134184. \n                    https:\/\/ieeexplore.ieee.org\/document\/7178758\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR3","unstructured":"G. Heigold, I. Moreno, S. Bengio, S. Shazeer, in 2016 IEEE International Conference On Acoustics, Speech and Signal Processing (ICASSP). End-to-end text-dependent speaker verification, (2016), pp. 5115\u20135119. \n                    https:\/\/ieeexplore.ieee.org\/abstract\/document\/7472652\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR4","unstructured":"N. W. Evans, T. Kinnunen, J. Yamagishi, in Interspeech. Spoofing and countermeasures for automatic speaker verification, (2013), pp. 925\u2013929. \n                    http:\/\/cs.uef.fi\/sipu\/pub\/IS131294.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR5","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.specom.2014.10.005","volume":"66","author":"Z. Wu","year":"2015","unstructured":"Z. Wu, N. Evans, T. Kinnunen, J. Yamagishi, F. Alegre, H. Li, Spoofing and countermeasures for speaker verification: A survey. Speech Comm.66:, 130\u2013153 (2015). \n                    https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167639314000788\n                    \n                  . \n                    https:\/\/doi.org\/10.1016\/j.specom.2014.10.005\n                    \n                  .","journal-title":"Speech Comm."},{"key":"151_CR6","unstructured":"Z. -F. Wabg, G. Wei, Q. -H. H, in 2011 International Conference On Machine Learning and Cybernetics (ICMLC), vol. 4. Channel pattern noise based playback attack detection algorithm for speaker recognition, (2011), pp. 1708\u20131713. \n                    https:\/\/ieeexplore.ieee.org\/document\/6016982\n                    \n                  . Accessed 04 June 2019."},{"issue":"1","key":"151_CR7","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1109\/89.365379","volume":"3","author":"D. A. Reynolds","year":"1995","unstructured":"D. A. Reynolds, R. C. Rose, Robust text-independent speaker identification using gaussian mixture speaker models. IEEE Trans. Speech Audio Process.3(1), 72\u201383 (1995). \n                    https:\/\/ieeexplore.ieee.org\/document\/365379\n                    \n                  .","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"151_CR8","unstructured":"A. Ross, A. K. Jain, in 2004 12th EuropeanSignal Processing Conference. Multimodal biometrics: An overview, (2004), pp. 1221\u20131224. \n                    https:\/\/ieeexplore.ieee.org\/document\/7080214\n                    \n                  . Accessed 04 June 2019."},{"issue":"1","key":"151_CR9","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/TCSVT.2003.818349","volume":"14","author":"A. K. Jain","year":"2004","unstructured":"A. K. Jain, A. Ross, S. Prabhakar, An introduction to biometric recognition. IEEE Trans. Circ. Syst. Video Technol.14(1), 4\u201320 (2004). \n                    https:\/\/ieeexplore.ieee.org\/document\/1262027\n                    \n                  .","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"151_CR10","unstructured":"Z. Wu, S. Gao, E. S. Cling, H. Li, in 2014 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA). A study on replay attack and anti-spoofing for text-dependent speaker verification, (2014), pp. 1\u20135. \n                    https:\/\/ieeexplore.ieee.org\/abstract\/document\/7041636\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR11","unstructured":"M. J. Alam, P. Kenny, T. Stafylakis, in Sixteenth Annual Conference of the International Speech Communication Association,. Combining amplitude and phase-based features for speaker verification with short duration utterances, (2015). \n                    https:\/\/pdfs.semanticscholar.org\/6d22\/330884f74d593afa3a672de39598b5f6ac11.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR12","unstructured":"Y. Liu, Y. Tian, L. He, J. Liu, M. T. Johnson, in Sixteenth Annual Conference of the International Speech Communication Association. Simultaneous utilization of spectral magnitude and phase information to extract supervectors for speaker verification anti-spoofing, (2015). \n                    https:\/\/www.isca-speech.org\/archive\/interspeech_2015\/papers\/i15_2082.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR13","unstructured":"H. Sailor, M. Kamble, H. Patil, in Proc. Interspeech 2018. Auditory filterbank learning for temporal modulation features in replay spoof speech detection, (2018), pp. 666\u2013670. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2018\/pdfs\/1651.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR14","unstructured":"Z. Chen, W. Zhang, Z. Xie, X. Xu, D. Chen, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Recurrent neural networks for automatic replay spoofing attack detection, (2018), pp. 2052\u20132056. \n                    https:\/\/ieeexplore.ieee.org\/abstract\/document\/8462644\n                    \n                  . Accessed 04 June 2019."},{"issue":"760","key":"151_CR15","first-page":"950","volume":"8","author":"S. Jelil","year":"2018","unstructured":"S. Jelil, S. Kalita, S. M. Prasanna, R. Sinha, Exploration of compressed ilpr features for replay attack detection. Interspeech. 8(760), 950 (2018). \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2018\/pdfs\/1297.pdf\n                    \n                  .","journal-title":"Interspeech"},{"key":"151_CR16","unstructured":"G. Suthokumar, V. Sethu, C. Wijenayake, E. Ambikairajah, in Proc. Interspeech 2018. Modulation dynamic features for the detection of replay attacks, (2018), pp. 691\u2013695. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2018\/pdfs\/1846.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR17","unstructured":"M. Witkowski, S. Kacprzak, P. Zelasko, K. Kowalczyk, J. Ga\u0142ka, in 18th Annual Conf. Int. Speech Communication Association (INTERSPEECH), Stockholm, Sweden. Audio replay attack detection using high-frequency features, (2017), pp. 27\u201331. \n                    https:\/\/pdfs.semanticscholar.org\/a2b4\/c396dc1064fb90bb5455525733733c761a7f.pdf\n                    \n                  ."},{"key":"151_CR18","doi-asserted-by":"crossref","unstructured":"H. A. Patil, M. R. Kamble, T. B. Patel, M. H. Soni, in Proc. INTERSPEECH. Novel variable length teager energy separation based instantaneous frequency features for replay detection, (2017), pp. 12\u201316. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2017\/abstracts\/1362.html\n                    \n                  .","DOI":"10.21437\/Interspeech.2017-1362"},{"key":"151_CR19","doi-asserted-by":"crossref","unstructured":"R. Font, J. Espn, M. J. Cano, in Proc. INTERSPEECH. Experimental analysis of features for replay attack detection results on the asvspoof 2017 challenge, (2017), pp. 7\u201311. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2017\/abstracts\/0450.html\n                    \n                  .","DOI":"10.21437\/Interspeech.2017-450"},{"key":"151_CR20","doi-asserted-by":"crossref","unstructured":"S. Jelil, R. K. Das, S. M. Prasanna, R. Sinha, in Proc. INTERSPEECH. Spoof detection using source, instantaneous frequency and cepstral features, (2017), pp. 22\u201326. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2017\/abstracts\/0930.html\n                    \n                  .","DOI":"10.21437\/Interspeech.2017-930"},{"key":"151_CR21","unstructured":"Z. Oo, Y. Kawakami, L. Wang, S. Nakagawa, X. Xiao, M. Iwahashi, in Proc. INTERSPEECH. DNN-based amplitude and phase feature enhancement for noise robust speaker identification, (2016), pp. 2204\u20132208. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2016\/abstracts\/0717.html\n                    \n                  . Accessed 04 June 2019."},{"issue":"14","key":"151_CR22","doi-asserted-by":"publisher","first-page":"18865","DOI":"10.1007\/s11042-018-5686-1","volume":"77","author":"Z. Oo","year":"2018","unstructured":"Z. Oo, L. Wang, K. Phapatanaburi, M. Iwahashi, S. Nakagawa, J. Dang, Phase and reverberation aware dnn for distant-talking speech enhancement. Multimed. Tools Appl.77(14), 18865\u201318880 (2018). \n                    https:\/\/link.springer.com\/article\/10.1007\/s11042-018-5686-1\n                    \n                  .","journal-title":"Multimed. Tools Appl."},{"issue":"4","key":"151_CR23","doi-asserted-by":"publisher","first-page":"810","DOI":"10.1109\/TIFS.2015.2398812","volume":"10","author":"J. Sanchez","year":"2015","unstructured":"J. Sanchez, I. Saratxaga, I. Hernaez, E. Navas, D. Erro, T. Raitio, Toward a universal synthetic speech spoofing detection using phase information. IEEE Trans. Inf. Forensic Secur.10(4), 810\u2013820 (2015). \n                    https:\/\/ieeexplore.ieee.org\/stamp\/stamp.jsp?arnumber=7029029\n                    \n                  .","journal-title":"IEEE Trans. Inf. Forensic Secur."},{"key":"151_CR24","unstructured":"Z. Wu, E. S. Chng, H. Li, in Thirteenth Annual Conference of the International Speech Communication Association. Detecting converted speech and natural speech for anti-spoofing attack in speaker recognition, (2012). \n                    https:\/\/pdfs.semanticscholar.org\/617d\/f2f1be497d98c0e255d66eb690af5a97b259.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR25","unstructured":"F. Itakura, T. Umezaki, in 1987 IEEE International Conference On Acoustics, Speech and Signal Processing (ICASSP). Distance measure for speech recognition based on the smoothed group delay spectrum, (1987), pp. 1257\u20131260. \n                    https:\/\/ieeexplore.ieee.org\/abstract\/document\/1169476\n                    \n                  ."},{"key":"151_CR26","unstructured":"R. M. Hegde, H. A. Murthy, G. R. Rao, in 2004 IEEE International Conference On Acoustics, Speech, and Signal Processing (ICASSP), vol. 1. Application of the modified group delay function to speaker identification and discrimination, (2004), p. 517. \n                    https:\/\/ieeexplore.ieee.org\/document\/1326036\n                    \n                  ."},{"key":"151_CR27","unstructured":"L. Wang, K. Minami, K. Yamamoto, S. Nakagawa, in 2010 IEEE International Conference On Acoustics Speech and Signal Processing (ICASSP). Speaker identification by combining mfcc and phase information in noisy environments, (2010), pp. 4502\u20134505. \n                    https:\/\/ieeexplore.ieee.org\/document\/5495586\n                    \n                  . Accessed 04 June 2019."},{"issue":"4","key":"151_CR28","doi-asserted-by":"publisher","first-page":"1085","DOI":"10.1109\/TASL.2011.2172422","volume":"20","author":"S. Nakagawa","year":"2012","unstructured":"S. Nakagawa, L. Wang, S. Ohtsuka, Speaker identification and verification by combining mfcc and phase information. IEEE Trans. Audio Speech Lang. Process.20(4), 1085\u20131095 (2012). \n                    https:\/\/ieeexplore.ieee.org\/document\/6047571\n                    \n                  .","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"151_CR29","unstructured":"L. Wang, Y. Yoshida, Y. Kawakami, S. Nakagawa, in Sixteenth Annual Conference of the International Speech Communication Association. Relative phase information for detecting human speech and spoofed speech, (2015). \n                    http:\/\/www.asvspoof.org\/asvspoof2015\/longbiao.pdf\n                    \n                  . Accessed 04 June 2019."},{"issue":"4","key":"151_CR30","doi-asserted-by":"publisher","first-page":"660","DOI":"10.1109\/JSTSP.2017.2694139","volume":"11","author":"L. Wang","year":"2017","unstructured":"L. Wang, S. Nakagawa, Z. Zhang, Y. Yoshida, Y. Kawakami, Spoofing speech detection using modified relative phase information. IEEE J. Sel. Top. Sign. Process.11(4), 660\u2013670 (2017). \n                    http:\/\/www.slp.cs.tut.ac.jp\/nakagawa\/pdfs\/wang.ieee.2017.pdf\n                    \n                  .","journal-title":"IEEE J. Sel. Top. Sign. Process."},{"key":"151_CR31","unstructured":"D. Li, L. Wang, J. Dang, M. Liu, Z. Oo, S. Nakagawa, H. Guan, X. Li, in Proc. Interspeech. Multiple phase information combination for replay attacks detection, (2018), pp. 656\u2013660. \n                    https:\/\/www.isca-speech.org\/archive\/Interspeech_2018\/pdfs\/2001.pdf\n                    \n                  . Accessed 04 June 2019."},{"issue":"6","key":"151_CR32","first-page":"2428","volume":"2","author":"G. S. Kumar","year":"2010","unstructured":"G. S. Kumar, K. P. Raju, M. R. CPVNJ, P. Satheesh, Speaker recognition using gmm. Int. J. Eng. Sci. Technol.2(6), 2428\u20132436 (2010). \n                    https:\/\/pdfs.semanticscholar.org\/3593\/eba26ee4aac7dca4f4bd75f79cdce46b4894.pdf\n                    \n                  .","journal-title":"Int. J. Eng. Sci. Technol."},{"key":"151_CR33","unstructured":"T. Kinnunen, M. Sahidullah, M. Falcone, L. Costantini, R. G. Hautam\u00e4ki, D. Thomsen, A. Sarkar, Z. -H. Tan, H. Delgado, M. Todisco, in 2017 IEEE International Conference On Acoustics, Speech and Signal Processing (ICASSP). Reddots replayed: A new replay spoofing attack corpus for text-dependent speaker verification research, (2017), pp. 5395\u20135399. \n                    https:\/\/ieeexplore.ieee.org\/document\/7953187\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR34","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.specom.2016.10.002","volume":"85","author":"C. Hanil\u00e7i","year":"2016","unstructured":"C. Hanil\u00e7i, T. Kinnunen, M. Sahidullah, A. Sizov, Spoofing detection goes noisy: An analysis of synthetic speech detection in the presence of additive noise. Speech Comm.85:, 83\u201397 (2016). \n                    https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167639316300681\n                    \n                  .","journal-title":"Speech Comm."},{"issue":"1","key":"151_CR35","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TASL.2006.876858","volume":"15","author":"R. M. Hegde","year":"2007","unstructured":"R. M. Hegde, H. A. Murthy, V. R. R. Gadde, Significance of the modified group delay feature in speech recognition. IEEE Trans. Audio Speech Lang. Process.15(1), 190\u2013202 (2007). \n                    https:\/\/ieeexplore.ieee.org\/document\/4032772\n                    \n                  .","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"151_CR36","unstructured":"R. Padmanabhan, S. H. Parthasarathi, H. A. Murthy, in Proc. INTERSPEECH. Robustness of phase based features for speaker recognition, (2009), pp. 2535\u20132358. \n                    https:\/\/www.isca-speech.org\/archive\/archive_papers\/interspeech_2009\/papers\/i09_2355.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR37","unstructured":"J. Kua, J. Eppsi, E. Ambikairajah, E. Choi, in Proc. INTERSPEECH. LS Regularization of Group Delay Features for Speaker Recognition, (2009), pp. 2887\u20132890. \n                    https:\/\/www.isca-speech.org\/archive\/archive_papers\/interspeech_2009\/papers\/i09_2887.pdf\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR38","unstructured":"Y. Shao, Z. Jin, D. Wang, S. Srinivasan, in 2009 IEEE International Conference On Acoustics, Speech and Signal Processing (ICASSP). An auditory-based feature for robust speech recognition, (2009), pp. 4625\u20134628. \n                    https:\/\/ieeexplore.ieee.org\/document\/4960661\n                    \n                  . Accessed 04 June 2019."},{"key":"151_CR39","unstructured":"X. Zhao, D. Wang, in 2013 IEEE International Conference On Acoustics, Speech and Signal Processing (ICASSP). Analyzing noise robustness of mfcc and gfcc features in speaker identification, (2013), pp. 7204\u20137208. \n                    https:\/\/ieeexplore.ieee.org\/document\/6639061\n                    \n                  . Accessed 04 June 2019."}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-019-0151-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13636-019-0151-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-019-0151-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,6,8]],"date-time":"2020-06-08T23:09:42Z","timestamp":1591657782000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13636-019-0151-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,10]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["151"],"URL":"https:\/\/doi.org\/10.1186\/s13636-019-0151-2","relation":{},"ISSN":["1687-4722"],"issn-type":[{"value":"1687-4722","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,6,10]]},"assertion":[{"value":"5 February 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"8"}}