{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T12:47:48Z","timestamp":1780922868956,"version":"3.54.1"},"reference-count":83,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2021,1,1]]},"DOI":"10.1587\/transinf.2020mui0002","type":"journal-article","created":{"date-parts":[[2020,12,31]],"date-time":"2020-12-31T22:30:38Z","timestamp":1609453838000},"page":"12-23","source":"Crossref","is-referenced-by-count":7,"title":["Generation and Detection of Media Clones"],"prefix":"10.1587","volume":"E104.D","author":[{"given":"Isao","family":"ECHIZEN","sequence":"first","affiliation":[{"name":"National Institute of Informatics"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Noboru","family":"BABAGUCHI","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Junichi","family":"YAMAGISHI","sequence":"additional","affiliation":[{"name":"National Institute of Informatics"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Naoko","family":"NITTA","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuta","family":"NAKASHIMA","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kazuaki","family":"NAKAMURA","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kazuhiro","family":"KONO","sequence":"additional","affiliation":[{"name":"Kansai University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fuming","family":"FANG","sequence":"additional","affiliation":[{"name":"National Institute of Informatics"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Seiko","family":"MYOJIN","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhenzhong","family":"KUANG","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Huy H.","family":"NGUYEN","sequence":"additional","affiliation":[{"name":"National Institute of Informatics"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ngoc-Dung T.","family":"TIEU","sequence":"additional","affiliation":[{"name":"National Institute of Informatics"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"532","reference":[{"key":"1","unstructured":"[1] \u201cCommunication system for defending against attacks of media clones.\u201d http:\/\/www2c.comm.eng.osaka-u.ac.jp\/proj\/mc\/eindex.html, 2020."},{"key":"2","doi-asserted-by":"publisher","unstructured":"[2] N. Babaguchi, I. Echizen, J. Yamagishi, N. Nitta, Y. Nakashima, K. Nakamura, K. Kono, F. Fang, S. Myojin, Z. Kuang, H.H. Nguyen, and N.D.T. Tieu, \u201cPreventing fake information generation against media clone attacks,\u201d IEICE Trans. Inf. &amp; Syst., vol.E104-D, no.1, pp.2-11, Jan. 2021. 10.1587\/transinf.2020MUI0001","DOI":"10.1587\/transinf.2020MUI0001"},{"key":"3","unstructured":"[3] A. van den Oord, S. Dieleman, H. Zen, K. Simonyan, O. Vinyals, A. Graves, N. Kalchbrenner, A. Senior, and K. Kavukcuoglu, \u201cWaveNet: a generative model for raw audio,\u201d arXiv preprint arXiv:1609.03499, 2016."},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] A. Tamamori, T. Hayashi, K. Kobayashi, K. Takeda, and T. Toda, \u201cSpeaker-dependent WaveNet vocoder,\u201d Proc. Interspeech, pp.1118-1122, 2017. 10.21437\/interspeech.2017-314","DOI":"10.21437\/Interspeech.2017-314"},{"key":"5","unstructured":"[5] S.\u00d6. Arik, M. Chrzanowski, A. Coates, G. Diamos, A. Gibiansky, Y. Kang, X. Li, J. Miller, A. Ng, J. Raiman, S. Sengupta, and M. Shoeybi, \u201cDeep voice: real-time neural text-to-speech,\u201d Proc. International Conference on Machine Learning, pp.195-204, 2017."},{"key":"6","unstructured":"[6] Y. Wang, R.J. Skerry-Ryan, D. Stanton, Y. Wu, R.J. Weiss, N. Jaitly, Z. Yang, Y. Xiao, Z. Chen, S. Bengio, Q. Le, Y. Agiomyrgiannakis, R. Clark, and R.A. Saurous, \u201cTacotron: a fully end-to-end text-to-speech synthesis model,\u201d Proc. Interspeech, pp.4006-4010, 2017. 10.21437\/interspeech.2017-1452"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] J. Shen, R. Pang, R.J. Weiss, M. Schuster, N. Jaitly, Z. Yang, Z. Chen, Y. Zhang, Y. Wang, R.J. Skerry-Ryan, R.A. Saurous, Y. Agiomygiannakis, and Y. Wu, \u201cNatural TTS synthesis by conditioning WaveNet on mel spectrogram predictions,\u201d Proc. IEEE International Conference on Acoustics, Speech and Signal Processing, pp.4779-4783, 2018. 10.1109\/icassp.2018.8461368","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"8","unstructured":"[8] S.\u00d6. Arik, J. Chen, K. Peng, W. Ping, and Z. Yanqi, \u201cNeural voice cloning with a few samples,\u201d Advances in Neural Information Processing Systems, pp.10019-10029, 2018."},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] J. Lorenzo-Trueba, J. Yamagishi, T. Toda, D. Saito, F. Villavicencio, T. Kinnunen, and Z. Ling, \u201cThe voice conversion challenge 2018: Promoting development of parallel and nonparallel methods,\u201d Proc. Odyssey 2018 The Speaker and Language Recognition Workshop, pp.195-202, 2018. 10.21437\/odyssey.2018-28","DOI":"10.21437\/Odyssey.2018-28"},{"key":"10","unstructured":"[10] R. Tolosana, R. Vera-Rodriguez, J. Fierrez, A. Morales, and J. Ortega-Garcia, \u201cDeepFakes and beyond: a survey of face manipulation and fake detection,\u201d arXiv preprint arXiv:2001.00179, 2020."},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] T. Karras, S. Laine, and T. Aila, \u201cA style-based generator architecture for generative adversarial networks,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.4401-4410, 2019. 10.1109\/cvpr.2019.00453","DOI":"10.1109\/CVPR.2019.00453"},{"key":"12","unstructured":"[12] \u201cFaceSwap.\u201d https:\/\/github.com\/MarekKowalski\/FaceSwap."},{"key":"13","unstructured":"[13] \u201cTerrifying high-tech porn: Creepy \u2018deepfake\u2019 videos are on the rise.\u201d https:\/\/www.foxnews.com\/tech\/terrifying-high-tech-porn-creepy-deepfake-videos-are-on-the-rise. Accessed: 2018-02-17."},{"key":"14","doi-asserted-by":"publisher","unstructured":"[14] H. Kim, P. Garrido, A. Tewari, W. Xu, J. Thies, N. Matthias, P. P\u00e9rez, C. Richardt, M. Zollh\u00f6fer, and C. Theobalt, \u201cDeep video portraits,\u201d ACM Transactions on Graphics, vol.37, no.4, pp.1-14, 2018. 10.1145\/3197517.3201283","DOI":"10.1145\/3197517.3201283"},{"key":"15","doi-asserted-by":"publisher","unstructured":"[15] A. Jamaludin, J.S. Chung, and A. Zisserman, \u201cYou said that?: synthesising talking faces from audio,\u201d International Journal of Computer Vision, vol.127, no.11-12, pp.1767-1779, 2019. 10.1007\/s11263-019-01150-y","DOI":"10.1007\/s11263-019-01150-y"},{"key":"16","doi-asserted-by":"publisher","unstructured":"[16] L. Liu, W. Xu, M. Zollh\u00f6fer, H. Kim, F. Bernard, M. Habermann, W. Wang, and C. Theobalt, \u201cNeural rendering and reenactment of human actor videos,\u201d ACM Transactions on Graphics, vol.38, no.5, pp.1-14, 2019. 10.1145\/3333002","DOI":"10.1145\/3333002"},{"key":"17","unstructured":"[17] I. Sutskever, J. Martens, and G. Hinton, \u201cGenerating text with recurrent neural networks,\u201d Proc. International Conference on Machine Learning, pp.1017-1024, 2011."},{"key":"18","unstructured":"[18] J. Chung, C. Gulcehre, K. Cho, and Y. Bengio, \u201cEmpirical evaluation of gated recurrent neural networks on sequence modeling,\u201d arXiv preprint arXiv:1412.3555, 2014."},{"key":"19","unstructured":"[19] A. Graves, \u201cGenerating sequences with recurrent neural networks,\u201d arXiv preprint arXiv:1308.0850, 2013."},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] A. R\u00f6ssler, D. Cozzolino, L. Verdoliva, C. Riess, J. Thies, and M. Nie\u00dfner, \u201cFaceForensics++: learning to detect manipulated facial images,\u201d Proc. International Conference on Computer Vision, pp.1-11, 2019. 10.1109\/iccv.2019.00009","DOI":"10.1109\/ICCV.2019.00009"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] J. Thies, M. Zollh\u00f6fer, M. Stamminger, C. Theobalt, and M. Nie\u00dfner, \u201cFace2Face: Real-time face capture and reenactment of RGB videos,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, 2016. 10.1109\/cvpr.2016.262","DOI":"10.1109\/CVPR.2016.262"},{"key":"22","doi-asserted-by":"publisher","unstructured":"[22] J. Thies, M. Zollh\u00f6fer, and M. Nie\u00dfner, \u201cDeferred neural rendering: Image synthesis using neural textures,\u201d ACM Transactions on Graphics, vol.38, no.4, pp.1-12, 2019. 10.1145\/3306346.3323035","DOI":"10.1145\/3306346.3323035"},{"key":"23","unstructured":"[23] B. Dolhansky, R. Howes, B. Pflaum, N. Baram, and C.C. Ferrer, \u201cThe deepfake detection challenge (DFDC) preview dataset,\u201d arXiv preprint arXiv:1910.08854, 2019."},{"key":"24","unstructured":"[24] L. Jiang, W. Wu, R. Li, C. Qian, and C.C. Loy, \u201cDeeperForensics-1.0: a large-scale dataset for real-world face forgery detection,\u201d arXiv preprint arXiv:2001.03024, 2020."},{"key":"25","doi-asserted-by":"crossref","unstructured":"[25] R. Raghavendra, K.B. Raja, S. Venkatesh, and C. Busch, \u201cTransferable deep-CNN features for detecting digital and print-scanned morphed face images,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition Workshop, pp.1822-1830, 2017. 10.1109\/cvprw.2017.228","DOI":"10.1109\/CVPRW.2017.228"},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] N. Rahmouni, V. Nozick, J. Yamagishi, and I. Echizen, \u201cDistinguishing computer graphics from natural images using convolution neural networks,\u201d Proc. IEEE International Workshop on Information Forensics and Security, pp.1-6, 2017. 10.1109\/wifs.2017.8267647","DOI":"10.1109\/WIFS.2017.8267647"},{"key":"27","doi-asserted-by":"crossref","unstructured":"[27] D. Afchar, V. Nozick, J. Yamagishi, and I. Echizen, \u201cMesoNet: a compact facial video forgery detection network,\u201d Proc. IEEE International Workshop on Information Forensics and Security, pp.1-7, 2018. 10.1109\/wifs.2018.8630761","DOI":"10.1109\/WIFS.2018.8630761"},{"key":"28","doi-asserted-by":"publisher","unstructured":"[28] W. Quan, K. Wang, D.-M. Yan, and X. Zhang, \u201cDistinguishing between natural and computer-generated images using convolutional neural networks,\u201d IEEE Trans. Inf. Forensics Security, vol.13, no.11, pp.2772-2787, 2018. 10.1109\/tifs.2018.2834147","DOI":"10.1109\/TIFS.2018.2834147"},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] Y. Li, M.-C. Chang, H. Farid, and S. Lyu, \u201cIn ictu oculi: Exposing AI generated fake face videos by detecting eye blinking,\u201d Proc. IEEE International Workshop on Information Forensics and Security, pp.1-7, 2018. 10.1109\/wifs.2018.8630787","DOI":"10.1109\/WIFS.2018.8630787"},{"key":"30","unstructured":"[30] S. Agarwal, H. Farid, Y. Gu, M. He, K. Nagano, and H. Li, \u201cProtecting world leaders against deep fakes,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp.38-45, 2019."},{"key":"31","unstructured":"[31] E. Sabir, J. Cheng, A. Jaiswal, W. AbdAlmageed, I. Masi, and P. Natarajan, \u201cRecurrent convolutional strategies for face manipulation detection in videos,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp.80-87, 2019."},{"key":"32","doi-asserted-by":"crossref","unstructured":"[32] J. Lorenzo-Trueba, F. Fang, X. Wang, I. Echizen, J. Yamagishi, and T. Kinnunen, \u201cCan we steal your vocal identity from the internet?: Initial investigation of cloning obama&apos;s voice using gan, wavenet and low-quality found data,\u201d Proc. Odyssey 2018 The Speaker and Language Recognition Workshop, pp.240-247, 2018. 10.21437\/odyssey.2018-34","DOI":"10.21437\/Odyssey.2018-34"},{"key":"33","unstructured":"[33] R. Kumar, J. Sotelo, K. Kumar, A. de Brebisson, and Y. Bengio, \u201cObamanet: Photo-realistic lip-sync from text,\u201d NIPS 2017 Workshop on Machine Learning for Creativity and Design, 2017."},{"key":"34","doi-asserted-by":"publisher","unstructured":"[34] S. Suwajanakorn, S.M. Seitz, and I. Kemelmacher-Shlizerman, \u201cSynthesizing Obama: learning lip sync from audio,\u201d ACM Transactions on Graphics, vol.36, no.4, pp.95:1-95:13, 2017. 10.1145\/3072959.3073640","DOI":"10.1145\/3072959.3073640"},{"key":"35","doi-asserted-by":"crossref","unstructured":"[35] S. Pascual, A. Bonafonte, and J. Serr\u00e0, \u201cSEGAN: Speech enhancement generative adversarial network,\u201d Proc. Interspeech, pp.3642-3646, 2017. 10.21437\/interspeech.2017-1428","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"36","doi-asserted-by":"crossref","unstructured":"[36] X. Wang, J. Lorenzo-Trueba, S. Takaki, L. Juvela, and J. Yamagishi, \u201cA comparison of recent waveform generation and acoustic modeling methods for neural-network-based speech synthesis,\u201d Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing, 2018. 10.1109\/icassp.2018.8461452","DOI":"10.1109\/ICASSP.2018.8461452"},{"key":"37","doi-asserted-by":"crossref","unstructured":"[37] E. Cooper, C.-I. Lai, Y. Yasuda, F. Fang, X. Wang, N. Chen, and J. Yamagishi, \u201cZero-shot multi-speaker text-to-speech with state-of-the-art neural speaker embeddings,\u201d Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing, 2020. 10.1109\/icassp40776.2020.9054535","DOI":"10.1109\/ICASSP40776.2020.9054535"},{"key":"38","doi-asserted-by":"crossref","unstructured":"[38] F. Fang, J. Yamagishi, I. Echizen, and J. Lorenzo-Trueba, \u201cHigh-quality nonparallel voice conversion based on cycle-consistent adversarial network,\u201d Proc. IEEE International Conference on Acoustics, Speech and Signal Processing, pp.5279-5283, 2018. 10.1109\/icassp.2018.8462342","DOI":"10.1109\/ICASSP.2018.8462342"},{"key":"39","doi-asserted-by":"crossref","unstructured":"[39] J.-Y. Zhu, T. Park, P. Isola, and A.A. Efros, \u201cUnpaired image-to-image translation using cycle-consistent adversarial networks,\u201d Proc. International Conference on Computer Vision, pp.2223-2232, 2017. 10.1109\/iccv.2017.244","DOI":"10.1109\/ICCV.2017.244"},{"key":"40","unstructured":"[40] I. Goodfellow, J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, A. Courville, and Y. Bengio, \u201cGenerative adversarial nets,\u201d Advances in neural information processing systems, pp.2672-2680, 2014."},{"key":"41","doi-asserted-by":"publisher","unstructured":"[41] Y. Nakashima, T. Yasui, L. Nguyen, and N. Babaguchi, \u201cSpeech-driven face reenactment for a video sequence,\u201d ITE Transactions on Media Technology and Applications, vol.8, no.1, pp.60-68, 2020. 10.3169\/mta.8.60","DOI":"10.3169\/mta.8.60"},{"key":"42","doi-asserted-by":"publisher","unstructured":"[42] Y. Liu, F. Xu, J. Chai, X. Tong, L. Wang, and Q. Huo, \u201cVideo-audio driven real-time facial animation,\u201d ACM Trans. Graphics, vol.34, no.6, pp.182:1-182:10, 2015. 10.1145\/2816795.2818122","DOI":"10.1145\/2816795.2818122"},{"key":"43","doi-asserted-by":"publisher","unstructured":"[14] H. Kim, P. Garrido, A. Tewari, W. Xu, J. Thies, N. Matthias, P. P\u00e9rez, C. Richardt, M. Zollh\u00f6fer, and C. Theobalt, \u201cDeep video portraits,\u201d ACM Transactions on Graphics, vol.37, no.4, pp.1-14, 2018. 10.1145\/3197517.3201283","DOI":"10.1145\/3197517.3201283"},{"key":"44","doi-asserted-by":"publisher","unstructured":"[44] J. Thies, M. Zollh\u00f6fer, C. Theobalt, M. Stamminger, and M. Nie\u00dfner, \u201cHeadon: Real-time reenactment of human portrait videos,\u201d ACM Transactions on Graphics, vol.37, no.4, pp.1-13, 2018. 10.1145\/3197517.3201350","DOI":"10.1145\/3197517.3201350"},{"key":"45","doi-asserted-by":"crossref","unstructured":"[45] K. Vougioukas, S. Petridis, and M. Pantic, \u201cRealistic speech-driven facial animation with GANs,\u201d International Journal of Computer Vision, 2019.","DOI":"10.1007\/s11263-019-01251-8"},{"key":"46","unstructured":"[46] V. Pratap, A. Hannun, Q. Xu, J. Cai, J. Kahn, G. Synnaeve, V. Liptchinsky, and R. Collobert, \u201cwav2letter++: The fastest open-source speech recognition system,\u201d arXiv preprint, arXiv:1812. 07625, 2018."},{"key":"47","doi-asserted-by":"crossref","unstructured":"[47] H.H. Nguyen, N.-D.T. Tieu, H.-Q. Nguyen-Son, J. Yamagishi, and I. Echizen, \u201cTransformation on computer-generated facial image to avoid detection by spoofing detector,\u201d Proc. IEEE International Conference on Multimedia and Expo, pp.1-6, 2018. 10.1109\/icme.2018.8486579","DOI":"10.1109\/ICME.2018.8486579"},{"key":"48","doi-asserted-by":"crossref","unstructured":"[48] R. Wu, X. Li, and B. Yang, \u201cIdentifying computer generated graphics via histogram features,\u201d Proc. IEEE International Conference on Image Processing, pp.1933-1936, 2011. 10.1109\/icip.2011.6115849","DOI":"10.1109\/ICIP.2011.6115849"},{"key":"49","doi-asserted-by":"publisher","unstructured":"[49] F. Peng, D.L. Zhou, M. Long, and X.M. Sun, \u201cDiscrimination of natural images and computer generated graphics based on multi-fractal and regression analysis,\u201d AEU-International Journal of Electronics and Communications, vol.71, pp.72-81, 2017. 10.1016\/j.aeue.2016.11.009","DOI":"10.1016\/j.aeue.2016.11.009"},{"key":"50","doi-asserted-by":"crossref","unstructured":"[50] H.H. Nguyen, H.-Q. Nguyen-Son, T.D. Nguyen, and I. Echizen, \u201cDiscriminating between computer-generated facial images and natural ones using smoothness property and local entropy,\u201d Proc. International Workshop on Digital Watermarking, pp.39-50, 2015. 10.1007\/978-3-319-31960-5_4","DOI":"10.1007\/978-3-319-31960-5_4"},{"key":"51","doi-asserted-by":"crossref","unstructured":"[51] L. Huang, A.D. Joseph, B. Nelson, B.I.P. Rubinstein, and J.D. Tygar, \u201cAdversarial machine learning,\u201d Proc. ACM Workshop on Security and Artificial Intelligence, pp.43-58, 2011. 10.1145\/2046684.2046692","DOI":"10.1145\/2046684.2046692"},{"key":"52","unstructured":"[52] B. Weyrauch, B. Heisele, J. Huang, and V. Blanz, \u201cComponent-based face recognition with 3d morphable models,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition Workshop, p.85, 2004. 10.1109\/cvpr.2004.315"},{"key":"53","doi-asserted-by":"crossref","unstructured":"[53] Y. Guo, L. Zhang, Y. Hu, X. He, and J. Gao, \u201cMS-Celeb-1M: A dataset and benchmark for large-scale face recognition,\u201d Proc.European conference on computer vision, pp.87-102, 2016. 10.1007\/978-3-319-46487-9_6","DOI":"10.1007\/978-3-319-46487-9_6"},{"key":"54","doi-asserted-by":"crossref","unstructured":"[54] F. Fang, X. Wang, J. Yamagishi, and I. Echizen, \u201cAudiovisual speaker conversion: jointly and simultaneously transforming facial expression and acoustic characteristics,\u201d Proc. IEEE International Conference on Acoustics, Speech and Signal Processing, pp.6795-6799, 2019. 10.1109\/icassp.2019.8683872","DOI":"10.1109\/ICASSP.2019.8683872"},{"key":"55","unstructured":"[55] K. Simonyan and A. Zisserman, \u201cVery deep convolutional networks for large-scale image recognition,\u201d 2014."},{"key":"56","doi-asserted-by":"crossref","unstructured":"[56] Z. Cao, G. Hidalgo, T. Simon, S.E. Wei, and Y. Sheikh, \u201cOpenpose: realtime multi-person 2d pose estimation using part affinity fields,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, 2018.","DOI":"10.1109\/CVPR.2017.143"},{"key":"57","doi-asserted-by":"crossref","unstructured":"[57] T. Tsutumi, K. Nakamura, S. Myojin, N. Nitta, and N. Babaguchi, \u201cTraining-free method for generating motion video clones from a still image considering self-occlusion of human body,\u201d Proc. International Conference on Image Processing, pp.509-513, 2019. 10.1109\/icip.2019.8803818","DOI":"10.1109\/ICIP.2019.8803818"},{"key":"58","doi-asserted-by":"crossref","unstructured":"[58] J.P. Lewis, M. Cordner, and N. Fong, \u201cPose space deformation: A unified approach to shape interpolation and skeleton-driven deformation,\u201d Proc. Annual Conference on Computer Graphics and Interactive Techniques, pp.165-172, 2000. 10.1145\/344779.344862","DOI":"10.1145\/344779.344862"},{"key":"59","doi-asserted-by":"crossref","unstructured":"[59] C. Chan, S. Ginosar, T. Zhou, and A.A. Efros, \u201cEverybody dance now,\u201d Proc. International Conference on Computer Vision, pp.5933-5942, 2019. 10.1109\/iccv.2019.00603","DOI":"10.1109\/ICCV.2019.00603"},{"key":"60","doi-asserted-by":"crossref","unstructured":"[60] K. Nakamura, E. Miyazaki, N. Nitta, and N. Babaguchi, \u201cGenerating handwritten character clones from an incomplete seed character set using collaborative filtering,\u201d Proc. International Conference on Frontiers in Handwriting Recognition, pp.68-73, 2018. 10.1109\/icfhr-2018.2018.00021","DOI":"10.1109\/ICFHR-2018.2018.00021"},{"key":"61","doi-asserted-by":"crossref","unstructured":"[61] D.I. Adelani, H. Mai, F. Fang, H.H. Nguyen, J. Yamagishi, and I. Echizen, \u201cGenerating sentiment-preserving fake online reviews using neural language models and their human- and machine-based detection,\u201d Proc. International Conference on Advanced Information Networking and Applications, pp.1-12, 2020. 10.1007\/978-3-030-44041-1_114","DOI":"10.1007\/978-3-030-44041-1_114"},{"key":"62","unstructured":"[62] A. Radford, J. Wu, R. Child, D. Luan, D. Amodei, and I. Sutskever, \u201cLanguage models are unsupervised multitask learners,\u201d OpenAI Blog, vol.1, no.8, p.9, 2019."},{"key":"63","unstructured":"[63] J. Devlin, M.W. Chang, K. Lee, and K. Toutanova, \u201cBert: Pre-training of deep bidirectional transformers for language understanding,\u201d arXiv preprint arXiv:1810.04805, 2018."},{"key":"64","doi-asserted-by":"crossref","unstructured":"[64] J. Lim, N. Nitta, K. Nakamura, and N. Babaguchi, \u201cGenerating spoofing tweets considering points of interest of target user,\u201d Proc. APSIPA Annual Summit and Conference, pp.1672-1678, 2019. 10.1109\/apsipaasc47483.2019.9023353","DOI":"10.1109\/APSIPAASC47483.2019.9023353"},{"key":"65","doi-asserted-by":"publisher","unstructured":"[65] Z. Wu, N. Evans, T. Kinnunen, J. Yamagishi, F. Alegre, and H. Li, \u201cSpoofing and countermeasures for speaker verification: A survey,\u201d Speech Communication, vol.66, pp.130-153, 2015. 10.1016\/j.specom.2014.10.005","DOI":"10.1016\/j.specom.2014.10.005"},{"key":"66","doi-asserted-by":"crossref","unstructured":"[66] H.H. Nguyen, J. Yamagishi, and I. Echizen, \u201cCapsule-forensics: Using capsule networks to detect forged images and videos,\u201d Proc. IEEE International Conference on Acoustics, Speech and Signal Processing, pp.2307-2311, 2019. 10.1109\/icassp.2019.8682602","DOI":"10.1109\/ICASSP.2019.8682602"},{"key":"67","doi-asserted-by":"crossref","unstructured":"[67] H.H. Nguyen, F. Fang, J. Yamagishi, and I. Echizen, \u201cMulti-task learning for detecting and segmenting manipulated facial images and videos,\u201d Proc. IEEE International Conference on Biometrics: Theory, Applications and System, 2019. 10.1109\/btas46853.2019.9185974","DOI":"10.1109\/BTAS46853.2019.9185974"},{"key":"68","unstructured":"[68] S. Sabour, N. Frosst, and G.E. Hinton, \u201cDynamic routing between capsules,\u201d Proc. Conference on Neural Information Processing Systems, 2017."},{"key":"69","unstructured":"[69] I. Chingovska, A. Anjos, and S. Marcel, \u201cOn the effectiveness of local binary patterns in face anti-spoofing,\u201d Proc. International Conference of the Biometrics Special Interest Group, 2012."},{"key":"70","unstructured":"[70] A. R\u00f6ssler, D. Cozzolino, L. Verdoliva, C. Riess, J. Thies, and M. Nie\u00dfner, \u201cFaceForensics: A large-scale video dataset for forgery detection in human faces,\u201d arXiv preprint arXiv:1803.09179, 2018."},{"key":"71","doi-asserted-by":"crossref","unstructured":"[71] K. Kono, T. Yoshida, S. Ohshiro, and N. Babaguchi, \u201cPassive video forgery detection considering spatio-temporal consistency,\u201d Proc. International Conference on Soft Computing and Pattern Recognition, pp.381-391, 2018. 10.1007\/978-3-030-17065-3_38","DOI":"10.1007\/978-3-030-17065-3_38"},{"key":"72","unstructured":"[72] N. Sowmya K and H. Chennamma, \u201cA survey on video forgery detection,\u201d arXiv preprint arXiv:1503.00843, 2015."},{"key":"73","doi-asserted-by":"crossref","unstructured":"[73] N. Hamasaki, K. Nakamura, N. Nitta, and N. Babaguchi, \u201cDiscrimination between handwritten and computer-generated texts using a distribution of patch-wise font features,\u201d Proc. APSIPA Annual Summit and Conference, pp.1665-1671, 2019. 10.1109\/apsipaasc47483.2019.9023197","DOI":"10.1109\/APSIPAASC47483.2019.9023197"},{"key":"74","unstructured":"[74] A. Graves, \u201cGenerating sequences with recurrent neural networks,\u201d arXiv preprint arXiv:1308.0850, 2014."},{"key":"75","unstructured":"[75] R. Zellers, A. Holtzman, H. Rashkin, Y. Bisk, A. Farhadi, F.Roesner, and Y. Choi, \u201cDefending against neural fake news,\u201d Advances in Neural Information Processing Systems, pp.9051-9062, 2019."},{"key":"76","doi-asserted-by":"crossref","unstructured":"[76] S. Gehrmann, H. Strobelt, and A.M. Rush, \u201cGltr: Statistical detection and visualization of generated text,\u201d Proc. Annual Meeting of the Association for Computational Linguistics, 2019. 10.18653\/v1\/p19-3019","DOI":"10.18653\/v1\/P19-3019"},{"key":"77","unstructured":"[77] I. Solaiman, M. Brundage, J. Clark, A. Askell, A. Herbert-Voss, J. Wu, A. Radford, and J. Wang, \u201cRelease strategies and the social impacts of language models,\u201d arXiv preprint arXiv:1908.09203, 2019."},{"key":"78","unstructured":"[78] Y. Liu, M. Ott, N. Goyal, J. Du, M. Joshi, D. Chen, O. Levy, M. Lewis, L. Zettlemoyer, and V. Stoyanov, \u201cRoberta: A robustly optimized bert pretraining approach,\u201d arXiv preprint arXiv:1907.11692, 2019."},{"key":"79","doi-asserted-by":"crossref","unstructured":"[79] T. Toda, L.-H. Chen, D. Saito, F. Villavicencio, M. Wester, Z. Wu, and J. Yamagishi, \u201cThe voice conversion challenge 2016,\u201d Proc.Interspeech, pp.1632-1636, 2016. 10.21437\/interspeech.2016-1066","DOI":"10.21437\/Interspeech.2016-1066"},{"key":"80","doi-asserted-by":"crossref","unstructured":"[80] T. Kinnunen, K.A. Lee, H. Delgado, N. Evans, M. Todisco, M. Sahidullah, J. Yamagishi, and D.A. Reynolds, \u201ct-dcf: a detection cost function for the tandem assessment of spoofing countermeasures and automatic speaker verification,\u201d Proc. Odyssey 2018 The Speaker and Language Recognition Workshop, pp.312-319, 2018. 10.21437\/odyssey.2018-44","DOI":"10.21437\/Odyssey.2018-44"},{"key":"81","unstructured":"[81] Z. Wu, T. Kinnunen, N. Evans, J. Yamagishi, C. Hanil\u00e7i, M.Sahidullah, and A. Sizov, \u201cASVspoof 2015: the first automatic speaker verification spoofing and countermeasures challenge,\u201d Proc. Interspeech, pp.2037-2041, 2015."},{"key":"82","doi-asserted-by":"crossref","unstructured":"[82] T. Kinnunen, M. Sahidullah, H. Delgado, M. Todisco, N. Evans, J. Yamagishi, and K.A. Lee, \u201cThe ASVspoof 2017 challenge: assessing the limits of replay spoofing attack detection,\u201d Proc. Interspeech, pp.2-6, 2017. 10.21437\/interspeech.2017-1111","DOI":"10.21437\/Interspeech.2017-1111"},{"key":"83","doi-asserted-by":"crossref","unstructured":"[83] M. Todisco, X. Wang, V. Vestman, M. Sahidullah, H. Delgado, A. Nautsch, J. Yamagishi, N. Evans, T.H. Kinnunen, and K.A. Lee, \u201cASVspoof 2019: future horizons in spoofed and fake audio detection,\u201d Proc. Interspeech, pp.1008-1012, 2019. 10.21437\/interspeech.2019-2249","DOI":"10.21437\/Interspeech.2019-2249"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E104.D\/1\/E104.D_2020MUI0002\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,1,2]],"date-time":"2021-01-02T03:26:52Z","timestamp":1609558012000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E104.D\/1\/E104.D_2020MUI0002\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,1]]},"references-count":83,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2020mui0002","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,1]]}}}