{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T23:41:15Z","timestamp":1743032475058,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031138287"},{"type":"electronic","value":"9783031138294"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-13829-4_49","type":"book-chapter","created":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T09:03:13Z","timestamp":1660467793000},"page":"569-578","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Arbitrary Voice Conversion via Adversarial Learning and Cycle Consistency Loss"],"prefix":"10.1007","author":[{"given":"Jie","family":"Lian","sequence":"first","affiliation":[]},{"given":"Pingyuan","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Yuxing","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Guilin","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,15]]},"reference":[{"key":"49_CR1","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1007\/978-0-387-77592-0","volume-title":"Speaker recognition. Fundamentals of Speaker Recognition","author":"H Beigi","year":"2011","unstructured":"Beigi, H.: Speaker recognition. Fundamentals of Speaker Recognition, pp. 543\u2013559. Springer, Boston (2011)"},{"key":"49_CR2","doi-asserted-by":"crossref","unstructured":"Robinson, C., Obin, N., Roebel, A.: Sequence-to-sequence modelling of f0 for speech emotion conversion. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6830\u20136834. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683865"},{"key":"49_CR3","doi-asserted-by":"crossref","unstructured":"Wang, Z., Ge, W., Wang, X., et al.: Accent and speaker disentanglement in many-to many voice conversion. In: 2021 12th International Symposium on Chinese Spoken Language Processing (ISCSLP), pp. 1\u20135. IEEE (2021)","DOI":"10.1109\/ISCSLP49672.2021.9362120"},{"key":"49_CR4","doi-asserted-by":"crossref","unstructured":"Galescu, L., Allen, J.F.: Pronunciation of proper names with a joint n-gram model for bi-directional grapheme-to-phoneme conversion. In: Seventh International Conference on Spoken Language Processing (2002)","DOI":"10.21437\/ICSLP.2002-79"},{"key":"49_CR5","doi-asserted-by":"crossref","unstructured":"Kain, A., Macon, M.W.: Spectral voice conversion for text-to-speech synthesis. In: Proceedings of the 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP\u201998 (Cat. No. 98CH36181). 1, pp. 285\u2013288. IEEE (1998)","DOI":"10.1109\/ICASSP.1998.674423"},{"key":"49_CR6","doi-asserted-by":"crossref","unstructured":"Zhang, M., Wang, X., Fang, F., et al.: Joint training framework for text-to-speech and voice conversion using multi-source tacotron and wavenet. arXiv preprint arXiv:1903.12389 (2019)","DOI":"10.21437\/Interspeech.2019-1357"},{"key":"49_CR7","doi-asserted-by":"crossref","unstructured":"Subramani, N., Rao, D.: Learning efficient representations for fake speech detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34(04), pp. 5859\u20135866 (2020)","DOI":"10.1609\/aaai.v34i04.6044"},{"key":"49_CR8","doi-asserted-by":"crossref","unstructured":"Wu, Z., Li, H.: Voice conversion versus speaker verification: an overview. APSIPA Trans. Sig. Inf. Process. 3 (2014)","DOI":"10.1017\/ATSIP.2014.17"},{"key":"49_CR9","doi-asserted-by":"crossref","unstructured":"Huang, C., Lin, Y.Y., Lee, H., et al.: Defending your voice: adversarial attack on voice conversion. In: 2021 IEEE Spoken Language Technology Workshop (SLT), pp. 552\u2013559. IEEE (2021)","DOI":"10.1109\/SLT48900.2021.9383529"},{"issue":"2","key":"49_CR10","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1250\/ast.11.71","volume":"11","author":"M Abe","year":"1990","unstructured":"Abe, M., Nakamura, S., Shikano, K., et al.: Voice conversion through vector quantization. J. Acoust. Soc. Japan (E) 11(2), 71\u201376 (1990)","journal-title":"J. Acoust. Soc. Japan (E)"},{"key":"49_CR11","doi-asserted-by":"crossref","unstructured":"Shikano, K., Nakamura, S., Abe, M.: Speaker adaptation and voice conversion by codebook mapping. In: 1991 IEEE International Symposium on Circuits and Systems (ISCAS), pp. 594\u2013597. IEEE (1991)","DOI":"10.1109\/ISCAS.1991.176405"},{"key":"49_CR12","doi-asserted-by":"crossref","unstructured":"Helander, E., Schwarz, J., Nurminen, J., et al.: On the impact of alignment on voice conversion performance. In: Ninth Annual Conference of the International Speech Communication Association (2008)","DOI":"10.21437\/Interspeech.2008-419"},{"issue":"2\u20133","key":"49_CR13","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/0167-6393(92)90012-V","volume":"11","author":"H Valbret","year":"1992","unstructured":"Valbret, H., Moulines, E., Tubach, J.P.: Voice transformation using PSOLA technique. Speech Commun. 11(2\u20133), 175\u2013187 (1992)","journal-title":"Speech Commun."},{"key":"49_CR14","doi-asserted-by":"crossref","unstructured":"Chen, Y., Chu, M., Chang, E., et al.: Voice conversion with smoothed GMM and MAP adaptation. In: Eighth European Conference on Speech Communication and Technology (2003)","DOI":"10.21437\/Eurospeech.2003-664"},{"key":"49_CR15","doi-asserted-by":"crossref","unstructured":"Mashimo, M., Toda, T., Shikano, K., et al.: Evaluation of cross-language voice conversion based on GMM and STRAIGHT (2001)","DOI":"10.21437\/Eurospeech.2001-111"},{"key":"49_CR16","doi-asserted-by":"crossref","unstructured":"Toda, T., Lu, J., Saruwatari, H., et al.: STRAIGHT-based voice conversion algorithm based on Gaussian mixture model (2000)","DOI":"10.21437\/ICSLP.2000-532"},{"issue":"5","key":"49_CR17","doi-asserted-by":"publisher","first-page":"912","DOI":"10.1109\/TASL.2010.2041699","volume":"18","author":"E Helander","year":"2010","unstructured":"Helander, E., Virtanen, T., Nurminen, J., et al.: Voice conversion using partial least squares regression. IEEE Trans. Audio Speech Lang. Process. 18(5), 912\u2013921 (2010)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"49_CR18","doi-asserted-by":"crossref","unstructured":"Helander, E., Sil\u00b4en, H., Virtanen, T., et al.: Voice conversion using dynamic kernel partial least squares regression. IEEE Trans. Audio Speech Lang. Process. 20(3), 806\u2013817 (2011)","DOI":"10.1109\/TASL.2011.2165944"},{"key":"49_CR19","doi-asserted-by":"crossref","unstructured":"Z Weninger, F., Watanabe, S., Tachioka, Y., et al.: Deep recurrent de-noising autoencoder and blind de-reverberation for reverberated speech recognition. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4623\u20134627. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6854478"},{"key":"49_CR20","doi-asserted-by":"publisher","first-page":"19143","DOI":"10.1109\/ACCESS.2019.2896880","volume":"7","author":"AB Nassif","year":"2019","unstructured":"Nassif, A.B., Shahin, I., Attili, I., et al.: Speech recognition using deep neural networks: a systematic review. IEEE access 7, 19143\u201319165 (2019)","journal-title":"IEEE access"},{"key":"49_CR21","unstructured":"Afouras, T., Chung, J.S., Senior, A., et al.: Deep audio-visual speech recognition. IEEE Trans. Pattern Anal. Mach. Intell. (2018)"},{"issue":"10","key":"49_CR22","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid, O., Mohamed, A., Jiang, H., et al.: Convolutional neural networks for speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(10), 1533\u20131545 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"49_CR23","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"49_CR24","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, 27 (2014)"},{"key":"49_CR25","doi-asserted-by":"crossref","unstructured":"Kameoka, H., Kaneko, T., Tanaka, K., et al.: ACVAE-VC: non-parallel many-to-many voice conversion with auxiliary classifier variational autoencoder. arXiv preprint arXiv:1808.05092 (2018)","DOI":"10.1109\/TASLP.2019.2917232"},{"key":"49_CR26","doi-asserted-by":"crossref","unstructured":"Saito, Y., Ijima, Y., Nishida, K., et al.: Non-parallel voice conversion using variational autoencoders conditioned by phonetic posteriorgrams and d-vectors. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5274\u20135278. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461384"},{"key":"49_CR27","doi-asserted-by":"crossref","unstructured":"Hsu, C.C., Hwang, H.T., Wu, Y.C., et al.: Voice conversion from non-parallel corpora using variational auto-encoder. In: 2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/APSIPA.2016.7820786"},{"key":"49_CR28","doi-asserted-by":"crossref","unstructured":"Wang, R., Ding, Y., Li, L., et al.: One-shot voice conversion using starGAN. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7729\u20137733. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053842"},{"key":"49_CR29","doi-asserted-by":"crossref","unstructured":"Kaneko, T., Kameoka, H.: Cyclegan-vc: non-parallel voice conversion using cycle consistent adversarial networks. In: 2018 26th European Signal Processing Conference (EUSIPCO), pp. 2100\u20132104. IEEE (2018)","DOI":"10.23919\/EUSIPCO.2018.8553236"},{"key":"49_CR30","doi-asserted-by":"crossref","unstructured":"Kaneko, T., Kameoka, H., Tanaka, K., et al.: Cyclegan-vc2: Improved cyclegan-based non-parallel voice conversion. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6820\u20136824. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8682897"},{"key":"49_CR31","doi-asserted-by":"crossref","unstructured":"Kameoka, H., Kaneko, T., Tanaka, K., et al.: Stargan-vc: non-parallel many-to-many voice conversion using star generative adversarial networks. In: 2018 IEEE Spoken Language Technology Workshop (SLT), pp. 266\u2013273. IEEE (2018)","DOI":"10.1109\/SLT.2018.8639535"},{"key":"49_CR32","unstructured":"Maas A L, Hannun A Y, Ng A Y. Rectifier nonlinearities improve neural network acoustic models[C]\/\/Proc. icml. 2013, 30(1): 3"},{"key":"49_CR33","unstructured":"Ulyanov, D., Vedaldi, A., Lempitsky, V.: Instance normalization: the missing ingredient for fast stylization. arXiv preprint arXiv:1607.08022 (2016)"},{"key":"49_CR34","unstructured":"Qian, K., Zhang, Y., Chang, S., et al.: Autovc: zero-shot voice style transfer with only autoencoder loss. In: International Conference on Machine Learning, PMLR, pp. 5210\u20135219 (2019)"},{"key":"49_CR35","doi-asserted-by":"crossref","unstructured":"Wu, D.Y., Chen, Y.H., Lee, H.Y.: Vqvc+: one-shot voice conversion by vector quantization and u-net architecture. arXiv preprint arXiv:2006.04154 (2020)","DOI":"10.21437\/Interspeech.2020-1443"},{"key":"49_CR36","doi-asserted-by":"crossref","unstructured":"Chen, Y.H., Wu, D.Y., Wu, T.H., et al.: Again-vc: a one-shot voice conversion using activation guidance and adaptive instance normalization. In: ICASSP 2021- 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5954\u20135958. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9414257"}],"container-title":["Lecture Notes in Computer Science","Intelligent Computing Theories and Application"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-13829-4_49","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T18:07:36Z","timestamp":1727806056000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-13829-4_49"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031138287","9783031138294"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-13829-4_49","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"15 August 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xi'an","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 August 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2022\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Open","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"IC-ICC-CN","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"449","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"209","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}