{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:44:33Z","timestamp":1750689873738,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030638290"},{"type":"electronic","value":"9783030638306"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-63830-6_4","type":"book-chapter","created":{"date-parts":[[2020,11,18]],"date-time":"2020-11-18T10:08:18Z","timestamp":1605694098000},"page":"36-47","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Investigation of Effectively Synthesizing Code-Switched Speech Using Highly Imbalanced Mix-Lingual Data"],"prefix":"10.1007","author":[{"given":"Shaotong","family":"Guo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7636-3797","authenticated-orcid":false,"given":"Sheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ju","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng","family":"Gong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuguang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9237-4821","authenticated-orcid":false,"given":"Jianwu","family":"Dang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kiyoshi","family":"Honda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,19]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Shen, J., et al.: Natural TTS synthesis by conditioningwavenet on MEL spectrogram predictions. International Conference on Acoustics. Speech and Signal Processing (ICASSP), pp. 4779\u20134783. IEEE, Calgary (2018)","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"4_CR2","unstructured":"Ping, W., Peng, K., Gibiansky, A., Arik, S.O., Kannan, A., Narang, S., Jonathan, R., Miller, J.: Deep voice 3: Scaling text-to-speech with convolutional sequence learning. In: 6th International Conference on Learning Representations (ICLR), Vancouver (2018)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Learning to speak fluently in a foreign language: multilingual speech synthesis and cross-language voice cloning. In: 20th International Speech Communication Association (INTERSPEECH), pp. 2080\u20132084. ISCA, Graz (2019)","DOI":"10.21437\/Interspeech.2019-2668"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Traber, C., et al.: From multilingual to polyglot speech synthesis. In: European Conference on Speech Communication and Technology, pp. 835\u2013839 (1999)","DOI":"10.21437\/Eurospeech.1999-203"},{"key":"4_CR5","unstructured":"Chu, M., Peng, H., Zhao, Y., Niu, Z., Chang, E.: Microsoft Mulan - a bilingual TTS system. In: International Conference on Acoustics, Speech, and Signal Processing (ICASSP), pp. I-I. IEEE, Hong Kong (2003)"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Ming, H., Lu, Y., Zhang, Z., Dong, M.: A light-weight methodof building an LSTM-RNN-based bilingual TTS system. In: 2017 International Conference on Asian Language Processing (IALP), pp. 201\u2013205. IEEE, Singapore (2017)","DOI":"10.1109\/IALP.2017.8300579"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Sitaram, S., Rallabandi, S.K., Rijhwani, S., Black, A.W.: Experiments with cross-lingual systems for synthesis of code-mixed text. In: SSW, pp. 76\u201381 (2016)","DOI":"10.21437\/SSW.2016-13"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Cao, Y., et al.: End-to-end code-switched tts with mix of monolingual recordings. In: 2019 IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP), pp. 6935\u20136939. IEEE, Brighton (2019)","DOI":"10.1109\/ICASSP.2019.8682927"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Xue, L., Song, W., Xu, G., Xie, L., Wu, Z.: Building a mixed-lingual neural tts system with only monolingual data. In: 20th International Speech Communication Association (INTERSPEECH), pp. 2060\u20132064. ISCA, Graz (2019)","DOI":"10.21437\/Interspeech.2019-3191"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Chandu, K.R., Rallabandi, S.K., Sitaram, S., Black, A.W.: Speech synthesis for mixed-language navigation instructions. In: 18th International Speech Communication Association (INTERSPEECH), pp. 57\u201361. ISCA, Stockholm (2017)","DOI":"10.21437\/Interspeech.2017-1259"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Campbell, N.: Talking foreign-concatenative speech synthesis and the language barrier. In: 7th European Conference on Speech Communication and Technology (EUROSPEECH), pp. 337\u2013340. ISCA, Aalborg (2001)","DOI":"10.21437\/Eurospeech.2001-105"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Zen, H., Braunschweiler, N., Buchholz, S., Gales, M.J., Knill, K., Krstulovic, S., Latorre, J.: Statistical parametric speech synthesis based on speaker and language factorization. In: IEEE Transactions on Audio, Speech, and Language Processing, vol. 20, no. 6, pp. 1713\u20131724. IEEE (2012)","DOI":"10.1109\/TASL.2012.2187195"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Chen, M., et al.: Cross-lingual, multi-speaker text-to-speech synthesis using neural speaker embedding. In: 20th International Speech Communication Association (INTERSPEECH), pp. 2105\u20132109. ISCA, Graz (2019)","DOI":"10.21437\/Interspeech.2019-1632"},{"issue":"3","key":"4_CR14","first-page":"421","volume":"53","author":"J Spa","year":"2002","unstructured":"Spa, J.: Handbook of the international phonetic association. a guide to the use of the international phonetic alphabet. Word-J. Int. Ling. Assoc. 53(3), 421\u2013424 (2002)","journal-title":"Word-J. Int. Ling. Assoc."},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Qian, Y., Cao, H., Soong, F.K.: HMM-based mixed-language (Mandarin-English) speech synthesis. In: Proceedings of the 2008 6th International Symposium on Chinese Spoken Language Processing, pp. 1\u20134. IEEE, Kunming (2008)","DOI":"10.1109\/CHINSL.2008.ECP.15"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Li, S., Lu, X., Ding, C., Shen, P., Kawahara, T.: investigating radical-based end-to-end speech recognition systems for chinese dialects and japanese. In: 20th International Speech Communication Association (INTERSPEECH), pp. 2200\u20132204. ISCA, Graz (2019)","DOI":"10.21437\/Interspeech.2019-2104"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Li, B., Zen, H.: Multi-language multi-speaker acoustic modeling for lstm-rnn based statistical parametric speech synthesis. In: 17th International Speech Communication Association (INTERSPEECH), pp. 2468\u20132472. ISCA, San Francisco (2016)","DOI":"10.21437\/Interspeech.2016-172"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Yu, Q., Liu, P., Wu, Z., Ang, S.K., Meng, H., Cai, L.: Learning cross-lingual information with multilingual BLSTM for speech synthesis of low-resource languages. IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP), pp. 5545\u20135549. IEEE, Shanghai (2016)","DOI":"10.1109\/ICASSP.2016.7472738"},{"key":"4_CR19","unstructured":"Sitaram, S., Black, A.W.: Speech synthesis of code-mixed text. In: 10th International Conference on Language Resources and Evaluation (LREC), pp. 3422\u20133428. ELRA, Portoroz (2016)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Chen, Y., Tu, T., Yeh, C., Lee, H.Y.: End-to-end text-to-speech for low-resource languages by cross-lingual transfer learning. In: 20th International Speech Communication Association (INTERSPEECH), pp. 2075\u20132079. ISCA, Graz (2019)","DOI":"10.21437\/Interspeech.2019-2730"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Mametani, K., Kato, T., Yamamoto, S.: Investigating context features hidden in End-to-End TTS. In: IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP), pp. 6920\u20136924. IEEE, Brighton (2019)","DOI":"10.1109\/ICASSP.2019.8683857"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Griffin, D.W., Lim, J.S.: Signal estimation from modified short-time fourier transform. In: IEEE International Conference on Acoustics. Speech, and Signal Processing, pp. 804\u2013807. IEEE, Boston (1983)","DOI":"10.1109\/ICASSP.1983.1172092"},{"key":"4_CR23","unstructured":"Lee, Y., Shon, S., Kim, T.: Learning pronunciation from a foreign language in speech synthesis networks. In: arXiv preprint arXiv:1811.09364, (2018)"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Chung, Y.A., Wang, Y., Hsu, W.N., Zhang, Y., Skerry-Ryan, R.J.: Semi-supervised training for improving data efficiency in end-to-end speech synthesis. In: 2019 IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP), pp. 6940\u20136944. IEEE, Brighton (2019)","DOI":"10.1109\/ICASSP.2019.8683862"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Li, B., Zhang, Y., Sainath, T., Wu, Y., Chan, W.: Bytes are all you need: End-to-end multilingual speech recognition and synthesis with bytes. In: 2019 IEEE International Conference on Acoustics. Speech and Signal Processing (ICASSP), pp. 5621\u20135625. IEEE, Brighton (2019)","DOI":"10.1109\/ICASSP.2019.8682674"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-63830-6_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T16:17:09Z","timestamp":1723911429000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-63830-6_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030638290","9783030638306"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-63830-6_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"19 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bangkok","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Thailand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 November 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.apnns.org\/ICONIP2020","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"618","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"187","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"189","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.18","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.68","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to COVID-19 pandemic the conference was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}