{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T03:48:15Z","timestamp":1742960895558,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":20,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819985364"},{"type":"electronic","value":"9789819985371"}],"license":[{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8537-1_22","type":"book-chapter","created":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:02:17Z","timestamp":1703530937000},"page":"269-281","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Utilizing Video Word Boundaries and\u00a0Feature-Based Knowledge Distillation Improving Sentence-Level Lip Reading"],"prefix":"10.1007","author":[{"given":"Hongzhong","family":"Zhen","sequence":"first","affiliation":[]},{"given":"Chenglong","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Jiyong","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Liming","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Gao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,26]]},"reference":[{"issue":"12","key":"22_CR1","doi-asserted-by":"publisher","first-page":"8717","DOI":"10.1109\/TPAMI.2018.2889052","volume":"44","author":"T Afouras","year":"2018","unstructured":"Afouras, T., Chung, J.S., Senior, A., et al.: Deep audio-visual speech recognition. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 8717\u20138727 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"22_CR2","unstructured":"Amodei, D., Ananthanarayanan, S., Anubhai, R., et al.: Deep speech 2: end-to-end speech recognition in English and Mandarin. In: International Conference on Machine Learning, pp. 173\u2013182. PMLR (2016)"},{"key":"22_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-319-54184-6_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Lip reading in the wild. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10112, pp. 87\u2013103. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54184-6_6"},{"key":"22_CR4","doi-asserted-by":"crossref","unstructured":"Feng, D., Yang, S., Shan, S.: An efficient software for building lip reading models without pains. In: 2021 IEEE International Conference on Multimedia & Expo Workshops (ICMEW), pp. 1\u20132. IEEE (2021)","DOI":"10.1109\/ICMEW53276.2021.9456014"},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., et al.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"22_CR6","unstructured":"Hannun, A.Y., Maas, A.L., Jurafsky, D., et al.: First-pass large vocabulary continuous speech recognition using bi-directional recurrent DNNs. arXiv preprint arXiv:1408.2873 (2014)"},{"key":"22_CR7","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Ma, P., Martinez, B., Petridis, S., et al.: Towards practical lipreading with distilled and efficient models. In: ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7608\u20137612. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9415063"},{"key":"22_CR9","doi-asserted-by":"crossref","unstructured":"Ma, P., Wang, Y., Petridis, S., et al.: Training strategies for improved lip-reading. In: ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8472\u20138476. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746706"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Ma, S., Wang, S., Lin, X.: A transformer-based model for sentence-level Chinese mandarin lipreading. In: 2020 IEEE Fifth International Conference on Data Science in Cyberspace (DSC), pp. 78\u201381. IEEE (2020)","DOI":"10.1109\/DSC50466.2020.00020"},{"key":"22_CR11","doi-asserted-by":"crossref","unstructured":"Qu, L., Weber, C., Wermter, S.: LipSound2: self-supervised pre-training for lip-to-speech reconstruction and lip reading. IEEE Trans. Neural Netw. Learn. Syst. (2022)","DOI":"10.1109\/TNNLS.2022.3191677"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Ren, S., Du, Y., Lv, J., et al.: Learning from the master: distilling cross-modal advanced knowledge for lip reading. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13325\u201313333 (2021)","DOI":"10.1109\/CVPR46437.2021.01312"},{"key":"22_CR13","unstructured":"Romero, A., Ballas, N., Kahou, S.E., et al.: FitNets: hints for thin deep nets. In: International Conference on Learning Representations (2015)"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Stafylakis, T., Khan, M.H., Tzimiropoulos, G.: Pushing the boundaries of audiovisual word recognition using residual networks and LSTMs. Comput. Vis. Image Underst. 176, 22\u201332 (2018)","DOI":"10.1016\/j.cviu.2018.10.003"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Stafylakis, T., Tzimiropoulos, G.: Combining residual networks with LSTMs for lipreading. In: Interspeech (2017)","DOI":"10.21437\/Interspeech.2017-85"},{"key":"22_CR16","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Yang, S., Zhang, Y., Feng, D., et al.: LRW-1000: a naturally-distributed large-scale benchmark for lip reading in the wild. In: 2019 14th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2019), pp. 1\u20138. IEEE (2019)","DOI":"10.1109\/FG.2019.8756582"},{"key":"22_CR18","doi-asserted-by":"crossref","unstructured":"Yu, L., Yazici, V.O., Liu, X., et al.: Learning metrics from teachers: compact networks for image embedding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2907\u20132916 (2019)","DOI":"10.1109\/CVPR.2019.00302"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xu, R., Song, M.: A cascade sequence-to-sequence model for Chinese mandarin lip reading. In: Proceedings of the ACM Multimedia Asia, pp. 1\u20136 (2019)","DOI":"10.1145\/3338533.3366579"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xu, R., Wang, X., et al.: Hearing lips: improving lip reading by distilling speech recognizers. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 6917\u20136924 (2020)","DOI":"10.1609\/aaai.v34i04.6174"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8537-1_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:10:09Z","timestamp":1703531409000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8537-1_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,26]]},"ISBN":["9789819985364","9789819985371"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8537-1_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,26]]},"assertion":[{"value":"26 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}