{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:28:15Z","timestamp":1775230095352,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":25,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819783663","type":"print"},{"value":"9789819783670","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8367-0_16","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T11:56:46Z","timestamp":1732795006000},"page":"259-273","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Distinguishing Neural Speech Synthesis Models Through Fingerprints in\u00a0Speech Waveforms"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3461-3034","authenticated-orcid":false,"given":"Chu Yuan","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2422-4618","authenticated-orcid":false,"given":"Jiangyan","family":"Yi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9344-6428","authenticated-orcid":false,"given":"Jianhua","family":"Tao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5785-7027","authenticated-orcid":false,"given":"Chenglong","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6499-0272","authenticated-orcid":false,"given":"Xinrui","family":"Yan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"16_CR1","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. In: International Conference on Learning Representations (2019)"},{"issue":"2","key":"16_CR2","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2019.2938758","volume":"43","author":"SH Gao","year":"2021","unstructured":"Gao, S.H., Cheng, M.M., Zhao, K., Zhang, X.Y., Yang, M.H., Torr, P.: Res2Net: a new multi-scale backbone architecture. IEEE Trans. Pattern Anal. Mach. Intell. 43(2), 652\u2013662 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR3","unstructured":"Kim, J., Kong, J., Son, J.: Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech. In: Proceedings of the 38th International Conference on Machine Learning, pp. 5530\u20135540. PMLR (2021)"},{"key":"16_CR4","doi-asserted-by":"publisher","unstructured":"Kinnunen, T., et al.: The ASVspoof 2017 challenge: assessing the limits of replay spoofing attack detection. In: Interspeech 2017, pp. 2\u20136. ISCA (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-1111","DOI":"10.21437\/Interspeech.2017-1111"},{"key":"16_CR5","unstructured":"Kong, J., Kim, J., Bae, J.: HiFi-GAN: generative adversarial networks for efficient and high fidelity speech synthesis. In: Advances in Neural Information Processing Systems, vol.\u00a033, pp. 17022\u201317033 (2020)"},{"key":"16_CR6","unstructured":"Lu, J., Zhang, Y., Li, Z., Shang, Z., Wang, W., Zhang, P.: Detecting unknown speech spoofing algorithms with nearest neighbors. In: Proceedings of IJCAI 2023 Workshop on Deepfake Audio Detection and Analysis (2023)"},{"key":"16_CR7","doi-asserted-by":"publisher","unstructured":"Marra, F., Gragnaniello, D., Verdoliva, L., Poggi, G.: Do GANs leave artificial fingerprints? In: 2019 IEEE Conference on Multimedia Information Processing and Retrieval (MIPR), pp. 506\u2013511. IEEE Computer Society (2019). https:\/\/doi.org\/10.1109\/MIPR.2019.00103","DOI":"10.1109\/MIPR.2019.00103"},{"key":"16_CR8","doi-asserted-by":"publisher","unstructured":"Mustafa, A., Pia, N., Fuchs, G.: StyleMelGAN: an efficient high-fidelity adversarial vocoder with temporal adaptive normalization. In: 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6034\u20136038 (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9413605","DOI":"10.1109\/ICASSP39728.2021.9413605"},{"key":"16_CR9","doi-asserted-by":"publisher","unstructured":"Pons, J., Pascual, S., Cengarle, G., Serr\u00e0, J.: Upsampling artifacts in neural audio synthesis. In: 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3005\u20133009 (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414913","DOI":"10.1109\/ICASSP39728.2021.9414913"},{"key":"16_CR10","unstructured":"Popov, V., Vovk, I., Gogoryan, V., Sadekova, T., Kudinov, M.: Grad-TTS: a diffusion probabilistic model for text-to-speech. In: Proceedings of the 38th International Conference on Machine Learning, pp. 8599\u20138608. PMLR (2021)"},{"key":"16_CR11","unstructured":"Ren, Y., et al.: FastSpeech 2: fast and high-quality end-to-end text to speech. In: International Conference on Learning Representations (ICLR) (2021)"},{"key":"16_CR12","doi-asserted-by":"publisher","unstructured":"Shen, J., et al.: Natural TTS synthesis by conditioning Wavenet on Mel spectrogram predictions. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4779\u20134783 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8461368","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"16_CR13","unstructured":"Tian, Y., Chen, Y., Tang, Y., Fu, B.: Deepfake algorithm recognition through multi-model fusion based on manifold measure. In: Proceedings of IJCAI 2023 Workshop on Deepfake Audio Detection and Analysis (2023)"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Todisco, M., et al.: Integrated presentation attack detection and automatic speaker verification: common features and gaussian back-end fusion. In: Proceedings Interspeech 2018, pp. 77\u201381 (2018)","DOI":"10.21437\/Interspeech.2018-2289"},{"issue":"86","key":"16_CR15","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(86), 2579\u20132605 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR16","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.csl.2020.101114","volume":"64","author":"X Wang","year":"2020","unstructured":"Wang, X., et al.: ASVspoof 2019: a large-scale public database of synthesized, converted and replayed speech. Comput. Speech Lang. 64, 101\u2013114 (2020). https:\/\/doi.org\/10.1016\/j.csl.2020.101114","journal-title":"Comput. Speech Lang."},{"key":"16_CR17","doi-asserted-by":"publisher","unstructured":"Wu, Z., et al.: ASVspoof 2015: the first automatic speaker verification spoofing and countermeasures challenge. In: Interspeech 2015, pp. 2037\u20132041. ISCA (2015). https:\/\/doi.org\/10.21437\/Interspeech.2015-462","DOI":"10.21437\/Interspeech.2015-462"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Yamagishi, J., et\u00a0al.: ASVspoof 2021: accelerating progress in spoofed and deepfake speech detection. arXiv preprint arXiv:2109.00537 (2021)","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"16_CR19","doi-asserted-by":"publisher","unstructured":"Yamamoto, R., Song, E., Kim, J.M.: Parallel WaveGAN: a fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram. In: 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6199\u20136203 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9053795","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"16_CR20","doi-asserted-by":"publisher","unstructured":"Yan, X., et al.: An initial investigation for detecting vocoder fingerprints of fake audio. In: Proceedings of the 1st International Workshop on Deepfake Detection for Audio Multimedia (DDAM 2022), pp. 61\u201368 (2022). https:\/\/doi.org\/10.1145\/3552466.3556525","DOI":"10.1145\/3552466.3556525"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"Yang, G., Yang, S., Liu, K., Fang, P., Chen, W., Xie, L.: Multi-band MelGAN: faster waveform generation for high-quality text-to-speech. arXiv:2005.05106 [cs, eess] (2020)","DOI":"10.1109\/SLT48900.2021.9383551"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Yi, J., et\u00a0al.: ADD 2022: the first audio deep synthesis detection challenge. In: 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 9216\u20139220. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746939"},{"key":"16_CR23","unstructured":"Yi, J., et al.: ADD 2023: the second audio deepfake detection challenge. In: Proceedings of IJCAI 2023 Workshop on Deepfake Audio Detection and Analysis (2023)"},{"key":"16_CR24","unstructured":"Yi, J., Wang, C., Tao, J., Zhang, X., Zhang, C.Y., Zhao, Y.: Audio deepfake detection: a survey. arXiv 2308.14970 (2023)"},{"key":"16_CR25","doi-asserted-by":"publisher","unstructured":"Zen, H., et al.: LibriTTS: a corpus derived from LibriSpeech for text-to-speech. In: Proceedings Interspeech 2019, pp. 1526\u20131530 (2019). https:\/\/doi.org\/10.21437\/Interspeech.2019-2441","DOI":"10.21437\/Interspeech.2019-2441"}],"container-title":["Lecture Notes in Computer Science","Chinese Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8367-0_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T12:07:19Z","timestamp":1732795639000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8367-0_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9789819783663","9789819783670"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8367-0_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare that they have no conflict of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"CCL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China National Conference on Chinese Computational Linguistics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiyuan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cncl2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/cips-cl.org\/static\/CCL2024\/en\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}