{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,30]],"date-time":"2025-11-30T04:09:47Z","timestamp":1764475787985,"version":"3.46.0"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T00:00:00Z","timestamp":1753747200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T00:00:00Z","timestamp":1753747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00034-025-03238-y","type":"journal-article","created":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T18:35:13Z","timestamp":1753814113000},"page":"9415-9445","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["TStarGANv2-VC: Non-parallel Multi-domain Transformer Based StarGANv2 Voice Conversion"],"prefix":"10.1007","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1549-8561","authenticated-orcid":false,"given":"Anandhakumar","family":"Dharmalingam","sequence":"first","affiliation":[]},{"given":"Venkata Krishna Kishore","family":"Kolli","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,29]]},"reference":[{"key":"3238_CR1","doi-asserted-by":"publisher","unstructured":"M.T. Akhter, P. Banerjee, S. Dhar, N.D. Jana, An analysis of performance evaluation metrics for voice conversion models. Proc. IEEE INDICON, pp. 1\u20136 (2022). https:\/\/doi.org\/10.1109\/INDICON56171.2022.10040000","DOI":"10.1109\/INDICON56171.2022.10040000"},{"key":"3238_CR2","unstructured":"K. Akuzawa, K. Onishi, K. Takiguchi, K. Mametani, K. Mori, Conditional deep hierarchical variational autoencoder for voice conversion. In APSIPA ASC, pp. 808\u2013813 (2021)."},{"key":"3238_CR3","doi-asserted-by":"publisher","unstructured":"Y. Alaa, M. Alfonse, M.M. Aref, A Survey on generative adversarial networks based models for many-to-many non-parallel voice conversion. In IEEE ICCI, pp. 221\u2013226 (2022). https:\/\/doi.org\/10.1109\/ICCI54321.2022.9756059","DOI":"10.1109\/ICCI54321.2022.9756059"},{"key":"3238_CR4","first-page":"100192","volume":"11","author":"M Amin","year":"2025","unstructured":"M. Amin, K.M. Nahar, H. Gharaibeh, A. Nasayreh, N.A. Alsalman, A. Alomar, L. Abualigah, DieT transformer with PCA-ADE integration for multiclass brain tumor classification. Intell-Based Med 11, 100192 (2025)","journal-title":"Intell-Based Med"},{"key":"3238_CR5","doi-asserted-by":"publisher","first-page":"1339159","DOI":"10.3389\/frsip.2024.1339159","volume":"4","author":"AR Bargum","year":"2024","unstructured":"A.R. Bargum, S. Serafin, C. Erkut, Reimagining speech: a scoping review of deep learning-based methods for non-parallel voice conversion. Front. Signal Process. 4, 1339159 (2024). https:\/\/doi.org\/10.3389\/frsip.2024.1339159","journal-title":"Front. Signal Process."},{"key":"3238_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.cma.2024.117588","volume":"434","author":"S Biswas","year":"2025","unstructured":"S. Biswas, G. Singh, B. Maiti, A.E.S. Ezugwu, K. Saleem, A. Smerat, U.K. Bera, Integrating Differential Evolution into Gazelle Optimization. Comput. Methods Appl. Mech. Eng. 434, 117588 (2025)","journal-title":"Comput. Methods Appl. Mech. Eng."},{"key":"3238_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107807","volume":"131","author":"L Chen","year":"2024","unstructured":"L. Chen, X. Zhang, Y. Li, M. Sun, Noise-robust voice conversion using adversarial training with multi-feature decoupling. Eng. Appl. Artif. Intell. 131, 107807 (2024). https:\/\/doi.org\/10.1016\/j.engappai.2023.107807","journal-title":"Eng. Appl. Artif. Intell."},{"key":"3238_CR8","doi-asserted-by":"publisher","unstructured":"H.Y. Choi, S.H. Lee, S.W. Lee, Diff-HierVC: Diffusion-based hierarchical voice conversion with robust pitch generation and masked prior for zero-shot speaker adaptation. Int. Speech Commun Assoc, pp. 2283\u20132287 (2023). https:\/\/doi.org\/10.48550\/arXiv.2311.04693","DOI":"10.48550\/arXiv.2311.04693"},{"issue":"5","key":"3238_CR9","doi-asserted-by":"publisher","first-page":"2489","DOI":"10.1109\/jbhi.2023.3239551","volume":"27","author":"M Chu","year":"2023","unstructured":"M. Chu, M. Yang, C. Xu, Y. Ma, J. Wang, Z. Fan, Z. Tao, D. Wu, E-DGAN: an encoder-decoder GAN based method for pathological to normal voice conversion. IEEE J. Biomed. Health Inform. 27(5), 2489\u20132500 (2023). https:\/\/doi.org\/10.1109\/jbhi.2023.3239551","journal-title":"IEEE J. Biomed. Health Inform."},{"issue":"1","key":"3238_CR10","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1109\/TAI.2022.3149858","volume":"4","author":"S Dhar","year":"2022","unstructured":"S. Dhar, N.D. Jana, S. Das, An adaptive-learning-based generative adversarial network for one-to-one voice conversion. IEEE Trans. Artif. Intell. 4(1), 92\u2013106 (2022). https:\/\/doi.org\/10.1109\/TAI.2022.3149858","journal-title":"IEEE Trans. Artif. Intell."},{"key":"3238_CR11","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.neunet.2022.01.003","volume":"148","author":"H Du","year":"2022","unstructured":"H. Du, L. Xie, H. Li, Noise-robust voice conversion with domain adversarial training. Neural Netw. 148, 74\u201384 (2022). https:\/\/doi.org\/10.1016\/j.neunet.2022.01.003","journal-title":"Neural Netw."},{"issue":"21","key":"3238_CR12","doi-asserted-by":"publisher","first-page":"11988","DOI":"10.3390\/app132111988","volume":"13","author":"K Ezzine","year":"2023","unstructured":"K. Ezzine, J. Di Martino, M. Frikha, Any-to-one non-parallel voice conversion using autoregressive model and LPCNet vocoder. Appl. Sci. 13(21), 11988 (2023). https:\/\/doi.org\/10.3390\/app132111988","journal-title":"Appl. Sci."},{"key":"3238_CR13","doi-asserted-by":"publisher","unstructured":"A. Gabry\u015b, G. Huybrechts, M.S. Ribeiro, C.M. Chien, J. Roth, G. Comini, R. Barra-Chicote, B. Perz, J. Lorenzo-Trueba, Voice filter: few-shot text-to-speech speaker adaptation using voice conversion as a post-processing module. Proc. IEEE ICASSP, pp. 7902\u20137906 (2022). https:\/\/doi.org\/10.1109\/icassp43922.2022.9747239","DOI":"10.1109\/icassp43922.2022.9747239"},{"issue":"6","key":"3238_CR14","doi-asserted-by":"publisher","first-page":"1308","DOI":"10.1109\/JSTSP.2022.3193761C","volume":"16","author":"WC Huang","year":"2022","unstructured":"W.C. Huang, S.W. Yang, T. Hayashi, T. Toda, A Comparative study of self-supervised speech representation based voice conversion. IEEE J. Sel. Topics Signal Process. 16(6), 1308\u20131318 (2022). https:\/\/doi.org\/10.1109\/JSTSP.2022.3193761C","journal-title":"IEEE J. Sel. Topics Signal Process."},{"key":"3238_CR15","doi-asserted-by":"publisher","unstructured":"T. Jayashankar, J. Wu, L. Sari, D. Kant, V. Manohar, Q. He, Self-supervised representations for singing voice conversion. In IEEE Int. Conf. on Acoustics, Speech and Signal Processing, pp. 1\u20135 (2023). https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10097147","DOI":"10.1109\/ICASSP49357.2023.10097147"},{"issue":"10","key":"3238_CR16","doi-asserted-by":"publisher","first-page":"4251","DOI":"10.3390\/app14104251","volume":"14","author":"C Jeong","year":"2024","unstructured":"C. Jeong, H.P. Chang, I.C. Yoo, D. Yook, Wav2Wav: wave-to-wave voice conversion. Appl. Sci. 14(10), 4251 (2024). https:\/\/doi.org\/10.3390\/app14104251","journal-title":"Appl. Sci."},{"key":"3238_CR17","unstructured":"Y. Jia, M.T. Ramanovich, T. Remez, R. Pomerantz, Translatotron: High-quality direct speech-to-speech translation with voice preservation. In ICML, pp. 10120\u201310134 (2022)."},{"key":"3238_CR18","doi-asserted-by":"publisher","unstructured":"T. Kaneko, H. Kameoka, K. Tanaka, N. Hojo, Maskcyclegan-VC: learning non-parallel voice conversion with filling in frames. In IEEE ICASSP, pp. 5919\u20135923 (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414851","DOI":"10.1109\/ICASSP39728.2021.9414851"},{"issue":"4","key":"3238_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11063-024-11613-0","volume":"56","author":"K Ko","year":"2024","unstructured":"K. Ko, D. Kim, K. Oh, H. Ko, WaveVC: speech and fundamental frequency consistent raw audio voice conversion. Neural. Process. Lett. 56(4), 1\u201313 (2024). https:\/\/doi.org\/10.1007\/s11063-024-11613-0","journal-title":"Neural. Process. Lett."},{"key":"3238_CR20","doi-asserted-by":"crossref","unstructured":"J.N. Kundu, A. Kulkarni, A. Singh, V. Jampani, R.V. Babu, Generalize then adapt: source-free domain adaptive semantic segmentation. In Proc. IEEE\/CVF Int. Conf. Comput. Vision, pp. 7046\u20137056 (2021).","DOI":"10.1109\/ICCV48922.2021.00696"},{"issue":"11","key":"3238_CR21","doi-asserted-by":"publisher","first-page":"1724","DOI":"10.3390\/electronics11111724","volume":"11","author":"WH Lai","year":"2022","unstructured":"W.H. Lai, S.L. Wang, Z.Y. Xu, CycleGAN-based singing\/humming to instrument conversion technique. Electronics 11(11), 1724 (2022). https:\/\/doi.org\/10.3390\/electronics11111724","journal-title":"Electronics"},{"key":"3238_CR22","doi-asserted-by":"publisher","unstructured":"S.H. Lee, H.R. Noh, W.J. Nam, S.W. Lee, Duration controllable voice conversion via phoneme-based information bottleneck. IEEE\/ACM Trans. Audio, Speech Lang. Process., 30, 1173\u20131183 (2022). https:\/\/doi.org\/10.1109\/TASLP.2022.3156757","DOI":"10.1109\/TASLP.2022.3156757"},{"key":"3238_CR23","doi-asserted-by":"publisher","first-page":"27278","DOI":"10.1109\/ACCESS.2021.3058382","volume":"9","author":"YK Lee","year":"2021","unstructured":"Y.K. Lee, H.W. Kim, J.G. Park, Many-to-many unsupervised speech conversion from nonparallel corpora. IEEE Access 9, 27278\u201327286 (2021). https:\/\/doi.org\/10.1109\/ACCESS.2021.3058382","journal-title":"IEEE Access"},{"key":"3238_CR24","doi-asserted-by":"publisher","unstructured":"Y.A. Li, A. Zare, N. Mesgarani, Starganv2-VC: A Diverse, Unsupervised, Non-parallel framework for natural-sounding voice conversion. arXiv:2107.10394 (2021). https:\/\/doi.org\/10.48550\/arXiv.2107.10394","DOI":"10.48550\/arXiv.2107.10394"},{"key":"3238_CR25","doi-asserted-by":"publisher","unstructured":"S. Liu, Y. Cao, D. Wang, X. Wu, X. Liu, H. Meng, Any-to-many voice conversion with location-relative sequence-to-sequence modeling. IEEE\/ACM Trans. Audio, Speech Lang. Process., 29, 1717\u20131728 (2021). https:\/\/doi.org\/10.1109\/TASLP.2021.3076867","DOI":"10.1109\/TASLP.2021.3076867"},{"key":"3238_CR26","doi-asserted-by":"publisher","unstructured":"C.C. Lo, S.W. Fu, W.C. Huang, X. Wang, J. Yamagishi, Y. Tsao, H.M. Wang, Mosnet: DL-based objective assessment for voice conversion. arXiv:1904.08352 (2019). https:\/\/doi.org\/10.48550\/arXiv.1904.08352","DOI":"10.48550\/arXiv.1904.08352"},{"issue":"23","key":"3238_CR27","doi-asserted-by":"publisher","first-page":"2159","DOI":"10.3390\/app122312159","volume":"12","author":"AH Meftah","year":"2022","unstructured":"A.H. Meftah, Y.A. Alotaibi, S.A. Selouani, Arabic Emotional voice conversion using english pre-trained StarGANv2-VC-based model. Appl. Sci. 12(23), 2159 (2022). https:\/\/doi.org\/10.3390\/app122312159","journal-title":"Appl. Sci."},{"key":"3238_CR28","doi-asserted-by":"publisher","first-page":"67835","DOI":"10.1109\/ACCESS.2023.3292003","volume":"11","author":"AH Meftah","year":"2023","unstructured":"A.H. Meftah, A.A. Alashban, Y.A. Alotaibi, S.A. Selouani, English emotional voice conversion using StarGAN model. IEEE Access 11, 67835\u201367849 (2023). https:\/\/doi.org\/10.1109\/ACCESS.2023.3292003","journal-title":"IEEE Access"},{"key":"3238_CR29","doi-asserted-by":"publisher","unstructured":"B. Nguyen, F. Cardinaux, NVC-Net: End-to-end adversarial voice conversion. In IEEE ICASSP, pp. 7012\u20137016 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747020","DOI":"10.1109\/ICASSP43922.2022.9747020"},{"key":"3238_CR30","doi-asserted-by":"publisher","unstructured":"H. J. Park, S. W. Yang, J. S. Kim, W. Shin, S. W. Han, TriAAN-VC: Triple adaptive attention normalization for any-to-any voice conversion. In IEEE Int. Conf. on Acoustics, Speech Signal Processing, pp. 1\u20135 (2023). https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096642","DOI":"10.1109\/ICASSP49357.2023.10096642"},{"key":"3238_CR31","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1117\/12.2659719","volume":"12456","author":"X Qiu","year":"2022","unstructured":"X. Qiu, Y. Luo, Research on synthesis of designated speaker speech based on StarGAN-VC model. In ICAIIP 12456, 256\u2013263 (2022). https:\/\/doi.org\/10.1117\/12.2659719","journal-title":"In ICAIIP"},{"key":"3238_CR32","doi-asserted-by":"publisher","unstructured":"D. Ronssin, M. Cernak, AC-VC: Non-parallel low latency phonetic posteriorgrams based voice conversion. Proc. IEEE ASRU, pp. 710\u2013716 (2021). https:\/\/doi.org\/10.1109\/ASRU51503.2021.9688277","DOI":"10.1109\/ASRU51503.2021.9688277"},{"key":"3238_CR33","doi-asserted-by":"publisher","unstructured":"N. Shah, M. Singh, N. Takahashi, N. Onoe, Nonparallel emotional voice conversion for unseen speaker-emotion pairs using dual domain adversarial network & virtual domain pairing. In IEEE Int. Conf. on Acoustics, Speech and Signal Processing, pp. 1\u20135 (2023). https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10095842","DOI":"10.1109\/ICASSP49357.2023.10095842"},{"key":"3238_CR34","doi-asserted-by":"publisher","unstructured":"B. Sisman, J. Yamagishi, S. King, H. Li, An overview of voice conversion and its challenges: from statistical modeling to deep learning. IEEE\/ACM Trans. Audio, Speech Lang. Process., 29, 132\u2013157 (2020). https:\/\/doi.org\/10.1109\/TASLP.2020.3038524","DOI":"10.1109\/TASLP.2020.3038524"},{"key":"3238_CR35","doi-asserted-by":"publisher","unstructured":"S. Si, J. Wang, X. Zhang, X. Qu, N. Cheng, J. Xiao, Boosting StarGANs for voice conversion with contrastive discriminator. In ICONIP, pp. 355\u2013366 (2022). https:\/\/doi.org\/10.1007\/978-3-031-30108-7_30","DOI":"10.1007\/978-3-031-30108-7_30"},{"key":"3238_CR36","doi-asserted-by":"publisher","unstructured":"H. Tang, X. Zhang, J. Wang, N. Cheng, J. Xiao, AVQVC: One-shot voice conversion by vector quantization with applying contrastive learning. In IEEE ICASSP, pp. 4613\u20134617 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746369","DOI":"10.1109\/ICASSP43922.2022.9746369"},{"key":"3238_CR37","doi-asserted-by":"publisher","unstructured":"Y. Wang, D. Li, R. Zhang, F. Li, Z. Wang, A Non-Parallel many-to-many speech conversion method based on STARGAN Model. In IEEE ICNCIT, pp. 12\u201315 (2022). https:\/\/doi.org\/10.1109\/NetCIT57419.2022.00010","DOI":"10.1109\/NetCIT57419.2022.00010"},{"key":"3238_CR38","doi-asserted-by":"publisher","unstructured":"R. Yamamoto, E. Song, J. M. Kim, Parallel WaveGAN: Fast waveform generation via GANs with multi-resolution spectrogram. In IEEE Int. Conf. on Acoustics, Speech and Signal Processing, (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.202","DOI":"10.1109\/ICASSP40776.202"},{"key":"3238_CR39","doi-asserted-by":"publisher","unstructured":"S. Yan, S. Chen, Y. Xu, D. Ke, MaskMel-Prosody-CycleGAN-VC: High-Quality Cross-Lingual Voice Conversion. In Proc. ICAIRC, pp. 19\u201326 (2023). https:\/\/doi.org\/10.1007\/978-981-97-2200-6_2","DOI":"10.1007\/978-981-97-2200-6_2"},{"key":"3238_CR40","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/j.specom.2020.05.004","volume":"122","author":"M Zhang","year":"2020","unstructured":"M. Zhang, B. Sisman, L. Zhao, H. Li, Deepconversion: voice conversion with limited parallel training data. Speech Commun. 122, 31\u201343 (2020). https:\/\/doi.org\/10.1016\/j.specom.2020.05.004","journal-title":"Speech Commun."},{"key":"3238_CR41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-46674-8_11","author":"X Zhang","year":"2023","unstructured":"X. Zhang, J. Wang, N. Cheng, J. Xiao, Voice conversion with denoising diffusion probabilistic GAN models. In Int. Conf. Adv. Data Mining Appl. (2023). https:\/\/doi.org\/10.1007\/978-3-031-46674-8_11","journal-title":"In Int. Conf. Adv. Data Mining Appl."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03238-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-025-03238-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03238-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,30]],"date-time":"2025-11-30T03:29:05Z","timestamp":1764473345000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-025-03238-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,29]]},"references-count":41,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["3238"],"URL":"https:\/\/doi.org\/10.1007\/s00034-025-03238-y","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2025,7,29]]},"assertion":[{"value":"14 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 June 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 July 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}