{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T08:41:53Z","timestamp":1774687313132,"version":"3.50.1"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031652813","type":"print"},{"value":"9783031652820","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-65282-0_10","type":"book-chapter","created":{"date-parts":[[2024,7,24]],"date-time":"2024-07-24T20:32:04Z","timestamp":1721853124000},"page":"149-164","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Can One Model Fit All? An\u00a0Exploration of\u00a0Wav2Lip\u2019s Lip-Syncing Generalizability Across Culturally Distinct Languages"],"prefix":"10.1007","author":[{"given":"Amirkia","family":"Rafiei Oskooei","sequence":"first","affiliation":[]},{"given":"Ezgi","family":"Yahsi","sequence":"additional","affiliation":[]},{"given":"Mehmet","family":"Sungur","sequence":"additional","affiliation":[]},{"given":"Mehmet","family":"S. Aktas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,25]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Toshpulatov, M., et al.: Talking human face generation: a survey. Expert Syst. Appl., 219, 119678 (2023). https:\/\/doi.org\/10.1016\/j.eswa.2023.119678","DOI":"10.1016\/j.eswa.2023.119678"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"A survey on the metaverse: the state-of-the-art, technologies, applications, and challenges(2023). https:\/\/doi.org\/10.1109\/jiot.2023.3278329","DOI":"10.1109\/JIOT.2023.3278329"},{"key":"10_CR3","doi-asserted-by":"publisher","unstructured":"Kato, R., et al.: Reality avatar for customer conversation in the metaverse. https:\/\/doi.org\/10.1007\/978-3-031-06509-5_10","DOI":"10.1007\/978-3-031-06509-5_10"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Ali, I.R., et al.: Realistic lip syncing for virtual character using common viseme set. Comput. Inf. Sci. 8(3), (2015). https:\/\/doi.org\/10.5539\/CIS.V8N3P71","DOI":"10.5539\/cis.v8n3p71"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Bennett Terry, K., Radhakrishnan, R.: Detection and correction of lip-sync errors using audio and video fingerprints. SMPTE Motion Imaging J. 119(3), 42\u201352 (2010). https:\/\/doi.org\/10.5594\/J11398","DOI":"10.5594\/J11398"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Fenghour, S., et al.: Deep learning-based automated lip-reading: a survey. IEEE Access. 9, 121184\u2013121205 (2021). https:\/\/doi.org\/10.1109\/ACCESS.2021.3107946","DOI":"10.1109\/ACCESS.2021.3107946"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Prajwal, K.R., et al.: A lip sync expert is all you need for speech to lip generation in the wild. In: Presented at the August 23 (2020). https:\/\/doi.org\/10.1145\/3394171.3413532","DOI":"10.1145\/3394171.3413532"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Mroueh, Y., et al.: Deep multimodal learning for audio-visual speech recognition. In: Presented at the April 19 (2015). https:\/\/doi.org\/10.1109\/ICASSP.2015.7178347","DOI":"10.1109\/ICASSP.2015.7178347"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Song, H.K., et al.: Talking face generation with multilingual TTS. In: Presented at the May 13 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.02074","DOI":"10.1109\/CVPR52688.2022.02074"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"KR, P., et al.: Towards automatic face-to-face translation. In: Presented at the March 1 (2020). https:\/\/doi.org\/10.1145\/3343031.3351066","DOI":"10.1145\/3343031.3351066"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Kadam, A., et al.: A survey of audio synthesis and lip-syncing for synthetic video generation. EAI Endorsed Trans. Creative Technol. 8(28), e2\u2013e2 (2021). https:\/\/doi.org\/10.4108\/EAI.14-4-2021.169187","DOI":"10.4108\/eai.14-4-2021.169187"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Llorach, G., et al.: web-based live speech-driven lip-sync. In: Presented at the September 1 (2016). https:\/\/doi.org\/10.1109\/VS-GAMES.2016.7590381","DOI":"10.1109\/VS-GAMES.2016.7590381"},{"key":"10_CR13","unstructured":"ObamaNet: photo-realistic lip-sync from text (2018). https:\/\/doi.org\/10.48550\/arXiv.1801.01442"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Jamaludin, A., et al.: You said that?: Synthesising talking faces from audio. Inter. J. Comput. Vision 127(11), 1767\u20131779 (2019). https:\/\/doi.org\/10.1007\/S11263-019-01150-Y","DOI":"10.1007\/s11263-019-01150-y"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Zisserman, A.: Out of Time: Automated Lip Sync in the Wild. https:\/\/doi.org\/10.1007\/978-3-319-54427-4","DOI":"10.1007\/978-3-319-54427-4"},{"key":"10_CR16","unstructured":"saifhassan\/Wav2Lip-HD. https:\/\/github.com\/saifhassan\/Wav2Lip-HD. Accessed 26 Mar 2024"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Real-ESRGAN: training real-world blind super-resolution with pure synthetic data. In: Presented at the July 22 (2021). https:\/\/doi.org\/10.1109\/ICCVW54120.2021.00217","DOI":"10.1109\/ICCVW54120.2021.00217"},{"key":"10_CR18","unstructured":"Wav2Lip-HR: synthesising clear high-resolution talking head in the wild. https:\/\/onlinelibrary.wiley.com\/doi\/10.1002\/cav.2226. Accessed 26 Mar 2024"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Towards real-world blind face restoration with generative facial prior. In: Presented at the June 20 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00905","DOI":"10.1109\/CVPR46437.2021.00905"},{"key":"10_CR20","unstructured":"Kim, B.K., et al.: A unified compression framework for efficient speech-driven talking-face generation. abs\/2304.00471 (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.00471"},{"key":"10_CR21","unstructured":"Wang, G., et al.: Attention-based lip audio-visual synthesis for talking face generation in the wild. abs\/2203.03984 (2022). https:\/\/doi.org\/10.48550\/arXiv.2203.03984"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: LPIPS-AttnWav2Lip: generic audio-driven lip synchronization for talking head generation in the wild. 157, 103028 (2024). https:\/\/doi.org\/103028","DOI":"10.1016\/j.specom.2023.103028"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Wang, K.C., et al.: CA-Wav2Lip: coordinate attention-based speech to lip synthesis in the wild. In: 2023 IEEE International Conference on Smart Computing (SMARTCOMP) (2023). https:\/\/doi.org\/10.1109\/SMARTCOMP58114.2023.00018","DOI":"10.1109\/SMARTCOMP58114.2023.00018"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: Masked lip-sync prediction by audio-visual contextual exploitation in transformers. In: Presented at the November 29 (2022). https:\/\/doi.org\/10.1145\/3550469.3555393","DOI":"10.1145\/3550469.3555393"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Patel, D., et al.: Visual dubbing pipeline with localized lip-sync and two-pass identity transfer. 110 (2022). https:\/\/doi.org\/10.1016\/j.cag.2022.11.005","DOI":"10.1016\/j.cag.2022.11.005"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Mukhopadhyay, S., et al.: Diff2lip: audio conditioned diffusion models for lip-synchronization, 5292\u20135302 (2024)","DOI":"10.1109\/WACV57701.2024.00521"},{"key":"10_CR27","doi-asserted-by":"crossref","unstructured":"Zhou, Y., et al.: MakeltTalk: speaker-aware talking-head animation. ACM Trans. Graph. (TOG) 39(6), 1\u201315 (2020). https:\/\/doi.org\/10.1145\/3414685.3417774","DOI":"10.1145\/3414685.3417774"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Guo, Y., et al.: AD-NeRF: Audio driven neural radiance fields for talking head synthesis. In: Presented at the March 20 (2021)","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"10_CR29","unstructured":"Yao, S., et al.: DFA-NeRF: personalized talking head generation via disentangled face attributes neural rendering. abs\/2201.00791 (2022)"},{"key":"10_CR30","doi-asserted-by":"crossref","unstructured":"Chatziagapi, A., et al.: LipNeRF: what is the right feature space to lip-sync a NeRF?. In: Presented at the January 5 (2023). https:\/\/doi.org\/10.1109\/FG57933.2023.10042567","DOI":"10.1109\/FG57933.2023.10042567"},{"key":"10_CR31","doi-asserted-by":"crossref","unstructured":"Bi, C., et al.: NeRF-AD: neural radiance field with attention-based disentanglement for talking face synthesis (2024)","DOI":"10.1109\/ICASSP48485.2024.10446195"},{"key":"10_CR32","doi-asserted-by":"publisher","unstructured":"Uygun, Y., et al.: On the large-scale graph data processing for user interface testing in big data science projects. In: 2020 IEEE International Conference on Big Data (Big Data), Atlanta, GA, USA, pp. 2049\u20132056, (2020). https:\/\/doi.org\/10.1109\/BigData50022.2020.9378153.","DOI":"10.1109\/BigData50022.2020.9378153."},{"key":"10_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"492","DOI":"10.1007\/978-3-319-09156-3_35","volume-title":"Computational Science and Its Applications \u2013 ICCSA 2014","author":"M Kapdan","year":"2014","unstructured":"Kapdan, M., Aktas, M., Yigit, M.: On the structural code clone detection problem: a survey and software metric based approach. In: Murgante, B., et al. (eds.) ICCSA 2014. LNCS, vol. 8583, pp. 492\u2013507. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-09156-3_35"},{"key":"10_CR34","doi-asserted-by":"crossref","unstructured":"Pierce, M.E., et al.: The QuakeSim project: web services for managing geophysical data and applications. In: Tiampo, K.F., Weatherley, D.K., Weinstein, S.A. (eds) Earthquakes: Simulations, Sources and Tsunamis . Pageoph Topical Volumes. Birkh\u00e4user Basel. https:\/\/doi.org\/10.1007\/978-3-7643-8757-0_11","DOI":"10.1007\/978-3-7643-8757-0_11"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"Olmezogullari, E., et al.: Pattern2Vec: representation of clickstream data sequences for learning user navigational behavior. Concurrency Computat Pract Exper. 34(9), e6546 (2022). https:\/\/doi.org\/10.1002\/cpe.6546","DOI":"10.1002\/cpe.6546"},{"key":"10_CR36","doi-asserted-by":"publisher","unstructured":"Olmezogullari, E., et al.: Representation of click-stream datasequences for learning user navigational behavior by using embeddings In: 2020 IEEE International Conference on Big Data (Big Data), Atlanta, GA, USA, pp. 3173\u20133179 (2020). https:\/\/doi.org\/10.1109\/BigData50022.2020.9378437.","DOI":"10.1109\/BigData50022.2020.9378437."},{"key":"10_CR37","doi-asserted-by":"crossref","unstructured":"Nacar, M.A., et al.: VLab: collaborative grid services and portals to support computational material science. Concurrency Comput. Pract. Exper. 19(12), 1717\u20131728 (2007). https:\/\/doi.org\/10.1002\/cpe.1199","DOI":"10.1002\/cpe.1199"},{"key":"10_CR38","unstructured":"MediaSpeech: Multilanguage ASR Benchmark and Dataset. Accessed 30 Mar 2021"},{"key":"10_CR39","unstructured":"Persian Speech Corpus. https:\/\/fa.persianspeechcorpus.com\/. Accessed 26 Mar 2024"},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"Chung, J.S., et al.: VoxCeleb2: deep speaker recognition. In: Presented at the June 14 (2018). https:\/\/doi.org\/10.21437\/INTERSPEECH.2018-1929","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"10_CR41","unstructured":"LRS3-TED: a large-scale dataset for visual speech recognition. Accessed 03 Sep 2018"},{"key":"10_CR42","doi-asserted-by":"crossref","unstructured":"Guan, J., et al.: StyleSync: high-fidelity generalized and personalized lip sync in style-based generator. abs\/2305.05445 (2023). https:\/\/doi.org\/10.48550\/arXiv.2305.05445","DOI":"10.1109\/CVPR52729.2023.00151"},{"key":"10_CR43","unstructured":"Wav2Lip UHQ. https:\/\/github.com\/numz\/sd-wav2lip-uhq. Accessed 26 Mar 2024"},{"key":"10_CR44","doi-asserted-by":"crossref","unstructured":"Aktas, M.S., et al.: Fault tolerant high performance information services for dynamic collections of grid and web services. Future Gener. Comput. Syst. 23(3), 317\u2013337 (2007). https:\/\/doi.org\/10.1016\/j.future.2006.05.009","DOI":"10.1016\/j.future.2006.05.009"},{"key":"10_CR45","doi-asserted-by":"crossref","unstructured":"Aydin, G., et al.: Building and applying geographical information system grids. Concurrency Computat. Pract. Exper., 20, 1653\u20131695 (2008). https:\/\/doi.org\/10.1002\/cpe.1312","DOI":"10.1002\/cpe.1312"},{"key":"10_CR46","doi-asserted-by":"crossref","unstructured":"Fox, G.C., et al.: Algorithms and the grid. Comput. Visual Sci. 12, 115\u2013124 (2009). https:\/\/doi.org\/10.1007\/s00791-007-0083-8","DOI":"10.1007\/s00791-007-0083-8"},{"key":"10_CR47","doi-asserted-by":"crossref","unstructured":"Aktas, M., et al.: iSERVO: implementing the international solid earth research virtual observatory by integrating computational grid and geographical information web services. Pure Appl. Geophys. 163, 2281\u20132296 (2006). https:\/\/doi.org\/10.1007\/s00024-006-0137-8","DOI":"10.1007\/s00024-006-0137-8"},{"key":"10_CR48","doi-asserted-by":"publisher","unstructured":"Aydin, G., et al.: SERVOGrid complexity computational environments (CCE) integrated performance analysis. In: The 6th IEEE\/ACM International Workshop on Grid Computing, 2005. Seattle, WA, USA, 6\u2013pp (2005). https:\/\/doi.org\/10.1109\/GRID.2005.1542750.","DOI":"10.1109\/GRID.2005.1542750."}],"container-title":["Lecture Notes in Computer Science","Computational Science and Its Applications \u2013 ICCSA 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-65282-0_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,24]],"date-time":"2024-07-24T20:33:39Z","timestamp":1721853219000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-65282-0_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031652813","9783031652820"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-65282-0_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"25 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCSA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Science and Its Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccsa2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}