{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T15:11:50Z","timestamp":1774624310609,"version":"3.50.1"},"publisher-location":"New York, New York, USA","reference-count":16,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1145\/3368926.3369662","type":"proceedings-article","created":{"date-parts":[[2019,12,20]],"date-time":"2019-12-20T13:30:11Z","timestamp":1576848611000},"page":"9-14","source":"Crossref","is-referenced-by-count":3,"title":["Emotional Speech Generator by using Generative Adversarial Networks"],"prefix":"10.1145","author":[{"given":"Takuya","family":"Asakura","sequence":"first","affiliation":[{"name":"Advanced course of Electronics, Tokyo Metropolitan College of Industrial Technology, Arakawa-ward, Tokyo, Japan"}]},{"given":"Shunsuke","family":"Akama","sequence":"additional","affiliation":[{"name":"Advanced course of Electronics Tokyo Metropolitan College of Industrial Technology Arakawa-ward, Tokyo, Japan"}]},{"given":"Eri","family":"Shimokawara","sequence":"additional","affiliation":[{"name":"Graduate school of Information Science, Tokyo Metropolitan University Hino-city, Tokyo, Japan"}]},{"given":"Toru","family":"Yamaguchi","sequence":"additional","affiliation":[{"name":"Graduate school of Information Science, Tokyo Metropolitan University Hino-city, Tokyo, Japan"}]},{"given":"Shoji","family":"Yamamoto","sequence":"additional","affiliation":[{"name":"Advanced course of Electronics, Tokyo Metropolitan College of Industrial Technology Arakawa-ward, Tokyo, Japan"}]}],"member":"320","reference":[{"key":"key-10.1145\/3368926.3369662-1","unstructured":"A. Oord, S. Dieleman, H. Zen, K. Simonyan, O. Vinyals, A. Graves, N. Kalchbrenner, A. Senior, and K. Kavukcuoglu, \"Wavenet: A generative model for rawaudio,\" arXiv: 1609.03499, 2016."},{"key":"key-10.1145\/3368926.3369662-2","unstructured":"A. Oord, Y, Li, I. Babuschkin, K. Simonyan, O. Vinyals, K. Kavukcuoglu, G. Driessche, E. Lockhart, L. C. Cobo, F. Stimberg, N. Casagrande, D. Grewe, S. Noury, S. Dieleman, E. Elsen, N. Kalchbrenner, H. Zen, A. Graves, H. King, T. Walters, D. Belov, and D. Hassabis, \"Parallel WaveNet: Fast High-Fidelity Speech Synthesis,\" arXiv: 1711.10433, 2017."},{"key":"key-10.1145\/3368926.3369662-3","unstructured":"J. Shen, R. Pang, R. J. Weiss, M. Schuster, N. Jaitly, Z. Yang, Z. Chen, Y. Zhang, Y. Wang, RJ Skerry-Ryan, R. A. Saurous, Y. Agiomyrgiannakis, and Y. Wu, \"Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions,\" arXiv: 1712.05884, 2017."},{"key":"key-10.1145\/3368926.3369662-4","doi-asserted-by":"crossref","unstructured":"T. Kaneko and H. Kameoka, \"Parallel-Data-Free Voice Conversion Using Cycle-Consistent Adversarial Networks,\" EUSIPCO, 2018, 2100--2104.","DOI":"10.23919\/EUSIPCO.2018.8553236"},{"key":"key-10.1145\/3368926.3369662-5","unstructured":"H. Kameoka, T. Kaneko, K. Tanaka, and N. Hojo, \"StarGAN-VC: Non-parallel many-to-many voice conversion with stargenerative adversarial networks,\" arXiv: 1806.02169, 2018."},{"key":"key-10.1145\/3368926.3369662-6","doi-asserted-by":"crossref","unstructured":"J.-Y. Zhu, T. Park, P. Isola, and A. A. Efros, \"Unpaired image-to-image translation using cycle-consistent adversarial networks,\" ICCV, 2017, 2223--2232.","DOI":"10.1109\/ICCV.2017.244"},{"key":"key-10.1145\/3368926.3369662-7","unstructured":"I. J. Goodfellow, J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, A. Courville, and Y. Bengio, \"Generative adversarial nets,\" NPIS, 2014, 2672--2680"},{"key":"key-10.1145\/3368926.3369662-8","unstructured":"Y. N. Dauphin, A. Fan, M. Auli, and D. Grangier, \"Language modeling with gatedconvolutional networks,\" ICML, 2017, 933--941."},{"key":"key-10.1145\/3368926.3369662-9","doi-asserted-by":"crossref","unstructured":"K. He, X. Zhang, S. Ren and J. Sun, \"Deep Residual Learning for Image Recognition,\" CVPR, 2016, 770--778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"key-10.1145\/3368926.3369662-10","doi-asserted-by":"crossref","unstructured":"Y. Wu and K. He, \"Group Normalization,\" ECCV, 2018, 3--19.","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"key-10.1145\/3368926.3369662-11","unstructured":"X. Huang and S. Belongie, \"Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization\", arXiv:1703.06868, 2017."},{"key":"key-10.1145\/3368926.3369662-12","doi-asserted-by":"crossref","unstructured":"W. Shi, J. Caballero, F. Husz&#8242;, J. Totz, A. P. Aitken, R. Bishop, D. Rueckert, and Z. Wang, \"Real-time single image and video super-resolution using an efficientsub-pixel convolutional neural network,\" CVPR, 2016, 1874--1883.","DOI":"10.1109\/CVPR.2016.207"},{"key":"key-10.1145\/3368926.3369662-13","doi-asserted-by":"crossref","unstructured":"C. Busso, M. Bulut, C.C. Lee, A. Kazemzadeh, E. Mower, S. Kim, J.N. Chang, S. Lee, and S. Narayanan, \"IEMOCAP: Interactive emotional dyadic motion capture database,\" Journal of Language Resources and Evaluation, Vol. 42, No. 4, pp. 335--359, 2008.","DOI":"10.1007\/s10579-008-9076-6"},{"key":"key-10.1145\/3368926.3369662-14","unstructured":"M. Morise, F. Yokomori, and K. Ozawa, \"WORLD: a vocoder-based high-quality speech synthesis system for real-time applications,\" IEICE transactions on information and systems, Vol. E99-D, No. 7, pp. 1877--1884, 2016."},{"key":"key-10.1145\/3368926.3369662-15","doi-asserted-by":"crossref","unstructured":"K. Liu, J. Zhang, and Y. Yan, \"High quality voice conversion through phoneme-based linear mapping functions with STRAIGHT for Mandarin,\" FSKD, 2007, 410--414.","DOI":"10.1109\/FSKD.2007.347"},{"key":"key-10.1145\/3368926.3369662-16","unstructured":"D. P. Kingma and J. Ba, \"Adam: A method for stochastic optimization,\" arXiv:1412.6980, 2015."}],"event":{"name":"the Tenth International Symposium","location":"Hanoi, Ha Long Bay, Viet Nam","acronym":"SoICT 2019","number":"10","sponsor":["SOICT, School of Information and Communication Technology - HUST","NAFOSTED, The National Foundation for Science and Technology Development"],"start":{"date-parts":[[2019,12,4]]},"end":{"date-parts":[[2019,12,6]]}},"container-title":["Proceedings of the Tenth International Symposium on Information and Communication Technology  - SoICT 2019"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3368926.3369662","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3369662&ftid=2101249&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:50Z","timestamp":1750203890000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3368926.3369662"}},"subtitle":[],"proceedings-subject":"Information and Communication Technology","short-title":[],"issued":{"date-parts":[[2019]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1145\/3368926.3369662","relation":{},"subject":[],"published":{"date-parts":[[2019]]}}}