{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T15:04:44Z","timestamp":1762787084112,"version":"build-2065373602"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s10772-025-10205-0","type":"journal-article","created":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T14:38:15Z","timestamp":1759934295000},"page":"837-849","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Artificial intelligence driven gender based text-to-speech systems(TTS) using deep learning algorithms"],"prefix":"10.1007","volume":"28","author":[{"given":"A.","family":"Siva Kumar Reddy","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bechoo","family":"Lal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"K.","family":"Arun Bhaskar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M.","family":"Bhaskar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashish","family":"Ashish","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dineshwari","family":"Bisen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,8]]},"reference":[{"key":"10205_CR7","unstructured":"Anari, Z., Hatamlou, A., Anari, B., & Masdari, M. (2020). Optimizing membership functions using learning automata for fuzzy association rule mining. Journal of AI and Data Mining, 8(4), 491\u2013514."},{"issue":"3","key":"10205_CR1","first-page":"391","volume":"8","author":"V. Ghasemi","year":"2020","unstructured":"Ghasemi, V., Javadian, M., & Bagheri Shouraki, S. (2020). High-dimensional unsupervised active learning method. Journal of AI and Data Mining, 8(3), 391\u2013407.","journal-title":"Journal of AI and Data Mining"},{"issue":"1","key":"10205_CR2","first-page":"1","volume":"7","author":"S. M. Ghazali","year":"2019","unstructured":"Ghazali, S. M., & Baleghi, Y. (2019). Pedestrian detection in infrared outdoor images based on atmospheric situation estimation. Journal of AI and Data Mining, 7(1), 1\u201316.","journal-title":"Journal of AI and Data Mining"},{"key":"10205_CR3","doi-asserted-by":"crossref","unstructured":"Gopi, A., Sajini, T., & Bhadran, V. K. (2013). Implementation of Malayalam text to speech using concatenative based TTS for android platform. In Proceedings of the international conference on control  communication and computing, December 2013.","DOI":"10.1109\/ICCC.2013.6731647"},{"key":"10205_CR39","doi-asserted-by":"crossref","unstructured":"Huang, S.-F., et al. (2021). Meta\u2011TTS: Meta\u2011learning for few-shot speaker-adaptive text\u2011to\u2011speech. arXiv.","DOI":"10.1109\/TASLP.2022.3167258"},{"issue":"5","key":"10205_CR4","first-page":"140","volume":"6","author":"A. Indumathi","year":"2012","unstructured":"Indumathi, A., & Chandra, E. (2012). Survey on speech synthesis. Signal Processing: An International Journal, 6(5), 140.","journal-title":"Signal Processing: An International Journal (SPIJ)"},{"key":"10205_CR40","doi-asserted-by":"crossref","unstructured":"Jeong, M., Kim, H., Cheon, S.\u202fJ., Choi, B.\u202fJ., & Kim, N.\u202fS. (2021). Diff\u2011TTS: A denoising diffusion model for text\u2011to\u2011speech. Interspeech 2021.","DOI":"10.21437\/Interspeech.2021-469"},{"issue":"4","key":"10205_CR5","first-page":"132","volume":"3","author":"R. Jayaraman","year":"2014","unstructured":"Jayaraman, R., Vasanthi, G., & Ramaratnam, M. S. (2014). A study on investors behavior towards equity and mutual funds. Global Journal of Commerce and Perspective, 3(4), 132\u2013136.","journal-title":"Global Journal of Commerce and Perspective"},{"key":"10205_CR6","unstructured":"John, S., & Chattopadhyay, P. (2015). Factors impacting leadership effectiveness: A literature review. Arabian Journal of Business & Management Review , 2223\u20135833."},{"key":"10205_CR8","doi-asserted-by":"crossref","unstructured":"Juvela, L., Bollepalli, B., Yamagishi, J., & Alku, P. (2019). Waveform generation for text-to-speech synthesis using pitch-synchronous multiscale generative adversarial networks. In Proceedings of the IEEE international conference on acoustics, speech and signal processing (pp. 6915\u20136919.","DOI":"10.1109\/ICASSP.2019.8683271"},{"key":"10205_CR9","doi-asserted-by":"crossref","unstructured":"Kaneko, T., Kameoka, H., Hojo, N., Ijima, Y., Hiramatsu, K., & Kashino, K. (2017). Generative adversarial network-based postfilter for statistical parametric speech synthesis. In Proceedings of the IEEE international conference on acoustics, speech and signal processing (pp. 4910\u20134914.","DOI":"10.1109\/ICASSP.2017.7953090"},{"key":"10205_CR41","doi-asserted-by":"publisher","unstructured":"Kim, J., Kim, S., Kong, J., & Yoon, S. (2020). Glow\u2011TTS: A generative flow for text\u2011to\u2011speech via monotonic alignment search. In Advances in neural information processing systems (NeurIPS 2020), December. https:\/\/doi.org\/10.5555\/3495724.3496400","DOI":"10.5555\/3495724.3496400"},{"key":"10205_CR36","doi-asserted-by":"publisher","unstructured":"Kong, J., Kim, J., Bae, J., et al. (2019). HiFi\u2011GAN: Generative adversarial networks for efficient and high-fidelity speech synthesis. arXiv. https:\/\/doi.org\/10.48550\/arXiv.2010.05646","DOI":"10.48550\/arXiv.2010.05646"},{"key":"10205_CR10","doi-asserted-by":"crossref","unstructured":"Li, N., et al. (2019). Neural speech synthesis with transformer network. Proceedings of the AAAI Conference on Artificial Intelligence, 33(1).","DOI":"10.1609\/aaai.v33i01.33016706"},{"key":"10205_CR11","doi-asserted-by":"crossref","unstructured":"Li, Y., Qin, D., & Zhang, J. (2021). Speech synthesis method based on Tacotron2. In Proceedings of the 13th international conference on advanced computational intelligence (pp. 94\u201399).","DOI":"10.1109\/ICACI52617.2021.9435882"},{"key":"10205_CR12","unstructured":"Lin, S., Su, W., Meng, L., Xie, F., Li, X., & Lu, L. (2021). arXiv preprint arXiv: 2109.13673."},{"key":"10205_CR13","doi-asserted-by":"crossref","unstructured":"Ling, Z. H., Deng, L., & Yu, D. (2013a). Modeling spectral envelopes using restricted Boltzmann machines for statistical parametric speech synthesis. In Proceedings of the IEEE international conference on acoustics, speech and signal processing (pp. 7825\u20137829).","DOI":"10.1109\/ICASSP.2013.6639187"},{"issue":"10","key":"10205_CR14","doi-asserted-by":"publisher","first-page":"2129","DOI":"10.1109\/TASL.2013.2269291","volume":"21","author":"Z. H. Ling","year":"2013","unstructured":"Ling, Z. H., Deng, L., & Yu, D. (2013b). Modeling spectral envelopes using restricted Boltzmann machines and deep belief networks for statistical parametric speech synthesis. IEEE Transactions on Audio, Speech, and Language Processing, 21(10), 2129\u20132139.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"10205_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.bdr.2017.06.003","volume":"10","author":"B. A. Milani","year":"2017","unstructured":"Milani, B. A., & Navimipour, N. J. (2017). A systematic literature review of the data replication techniques in the cloud environments. Big Data Research, 10, 1\u20137.","journal-title":"Big Data Research"},{"issue":"12","key":"10205_CR16","first-page":"1","volume":"4","author":"S. Modi","year":"2015","unstructured":"Modi, S. (2015). A study on investors\u2019 preference towards equity in Ahmedabad. International Journal of Innovative Research & Studies, 4(12), 1\u201320.","journal-title":"International Journal of Innovative Research & Studies"},{"issue":"1","key":"10205_CR17","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"A. R. Mohamed","year":"2011","unstructured":"Mohamed, A. R., Dahl, G. E., & Hinton, G. (2011). Acoustic modeling using deep belief networks. IEEE Transactions on Audio, Speech, and Language Processing, 20(1), 14\u201322.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"10205_CR18","first-page":"461","volume":"8","author":"R. Mohammadian Fini","year":"2020","unstructured":"Mohammadian Fini, R., Mahlouji, M., & Shahidinejad, A. (2020). Multi-view face detection in open environments using Gabor features and neural networks. Journal of AI and Data Mining, 8(4), 461\u2013470.","journal-title":"Journal of AI and Data Mining"},{"issue":"3","key":"10205_CR19","first-page":"393","volume":"7","author":"M. Moradi","year":"2019","unstructured":"Moradi, M., & Hamidzadeh, J. (2019). Ensemble-based top-k recommender system considering incomplete data. Journal of AI and Data Mining, 7(3), 393\u2013402.","journal-title":"Journal of AI and Data Mining"},{"issue":"2","key":"10205_CR20","first-page":"165","volume":"2","author":"H. M. Rakesh","year":"2014","unstructured":"Rakesh, H. M. (2014). A study on individuals investors behavior in stock markets of India. IJMSS, 2(2), 165\u2013174.","journal-title":"IJMSS"},{"key":"10205_CR21","unstructured":"Ravi, D. J., & Patilkulkarni, S. (2021). Text-to-speech synthesis system for kannada language. International Journal of Advanced Research in Computer Science, 2(1), 298\u2013304."},{"key":"10205_CR22","doi-asserted-by":"crossref","unstructured":"Rebai, I., & Ben Ayed, Y. (2013, June). Arabic text-to-speech synthesis based on neural networks for MFCC estimation. In Proceedings of the world congress on computer and information technology (pp. 1\u20135).","DOI":"10.1109\/WCCIT.2013.6618665"},{"key":"10205_CR38","doi-asserted-by":"publisher","unstructured":"Ren, Y., Ruan, Y., Tan, X., Qin, T., Zhao, S., Liu, T.-Y., et al. (2019a). FastSpeech: Fast, robust and controllable text to speech. https:\/\/doi.org\/10.48550\/arXiv.1905.09263","DOI":"10.48550\/arXiv.1905.09263"},{"key":"10205_CR24","unstructured":"Ren, Y., Tan, X., Qin, T., Zhao, S., Zhao, Z., & Liu, T. Y. (2019b). Almost unsupervised text-to-speech and automatic speech recognition. In Proceedings of the international conference on machine learning (pp. 5410\u20135419)."},{"key":"10205_CR25","unstructured":"Rizvi, R., & Abrar, A. (2015). Factors affecting an individual investor behavior: An empirical study in twin cities (Rawalpindi and Islamabad) of Pakistan. SS International Journal of Economics and Management, 5(5), 1\u201327."},{"key":"10205_CR26","unstructured":"Rodriguez, J., & Walters, K. (2014). The importance of training and development in employee performance and evaluation. World Wide Journal of Multidisciplinary Research and Development, 2454\u20136615."},{"key":"10205_CR27","doi-asserted-by":"crossref","unstructured":"Saito, Y., Takamichi, S., & Saruwatari, H. (2017). Statistical parametric speech synthesis incorporating generative adversarial networks. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(1), 84\u201396.","DOI":"10.1109\/TASLP.2017.2761547"},{"key":"10205_CR28","unstructured":"Tan, X., Ren, Y., He, J., Zhou, Z., & Qin, T. (2022). arXiv preprint arXiv: 2205.04421."},{"key":"10205_CR29","unstructured":"Thu, C. S. T., & Zin, T. (2020). Implementation of text to speech conversion. International Journal of Engineering Research & Technology, 3(3)."},{"key":"10205_CR30","doi-asserted-by":"crossref","unstructured":"Toma, S. A., Tarsa, G. I., Oancea, E., Munteanu, D. P., Totir, F., & Anton, L. (2010). A TD-PSOLA based method for speech synthesis and compression. In Proceedings of the 8th international conference on communications (pp. 241\u2013250).","DOI":"10.1109\/ICCOMM.2010.5509044"},{"key":"10205_CR31","unstructured":"Van den Oord, A., Dieleman, S., Zen, H., Simonyan, K., Vinyals, O., Graves, A., Kalchbrenner, N., Senior, A., & Kavukcuoglu, K. (2016). arXiv preprint arXiv: 1609.03499."},{"key":"10205_CR32","doi-asserted-by":"publisher","unstructured":"Wagner, D., Bayerl, S. P., Cordourier Maruri, H. A., & Bocklet, T. (2022). Generative models for improved naturalness, intelligibility, and voicing of whispered speech. In Proceedings of the 2022 IEEE spoken language technology workshop (SLT) (pp. 943\u2013948). https:\/\/doi.org\/10.1109\/SLT54892.2023.10022796","DOI":"10.1109\/SLT54892.2023.10022796"},{"key":"10205_CR33","unstructured":"Wang, G. (2019). arXiv preprint arXiv: 1903.05955."},{"key":"10205_CR34","doi-asserted-by":"crossref","unstructured":"Wang, X., Takaki, S., & Yamagishi, J. (2017). An RNN-based quantized F0 model with multi-tier feedback links for text-to-speech synthesis. In Proceedings of INTERSPEECH (pp. 1059\u20131063).","DOI":"10.21437\/Interspeech.2017-246"},{"key":"10205_CR35","unstructured":"Watts, O., Stan, A., Clark, R. A., Mamiya, Y., Giurgiu, M., Yamagishi, J., & King, S. (2013). Unsupervised and lightly-supervised learning for rapid construction of TTS systems in multiple languages from found data: Evaluation and analysis. In Proceedings of the eighth ISCA workshop on speech synthesis."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10205-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-025-10205-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10205-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T15:00:23Z","timestamp":1762786823000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-025-10205-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,8]]},"references-count":39,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["10205"],"URL":"https:\/\/doi.org\/10.1007\/s10772-025-10205-0","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2025,10,8]]},"assertion":[{"value":"28 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}