{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:19:50Z","timestamp":1775067590987,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,12,15]],"date-time":"2019-12-15T00:00:00Z","timestamp":1576368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,12,15]]},"DOI":"10.1145\/3338533.3366579","type":"proceedings-article","created":{"date-parts":[[2020,1,11]],"date-time":"2020-01-11T04:30:32Z","timestamp":1578717032000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":60,"title":["A Cascade Sequence-to-Sequence Model for Chinese Mandarin Lip Reading"],"prefix":"10.1145","author":[{"given":"Ya","family":"Zhao","sequence":"first","affiliation":[{"name":"Zhejiang Provincial Key Laboratory of Service Robots Zhejiang University"}]},{"given":"Rui","family":"Xu","sequence":"additional","affiliation":[{"name":"Zhejiang Provincial Key Laboratory of Service Robots Zhejiang University"}]},{"given":"Mingli","family":"Song","sequence":"additional","affiliation":[{"name":"Zhejiang Provincial Key Laboratory of Service Robots Zhejiang University"}]}],"member":"320","published-online":{"date-parts":[[2020,1,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Lipnet: Sentence-level lipreading. arXiv preprint","author":"Assael Yannis M","year":"2016"},{"key":"e_1_3_2_1_2_1","unstructured":"Dzmitry Bahdanau Kyunghyun Cho and Yoshua Bengio. 2015. Neural Machine Translation by Jointly Learning to Align and Translate. international conference on learning representations (2015).  Dzmitry Bahdanau Kyunghyun Cho and Yoshua Bengio. 2015. Neural Machine Translation by Jointly Learning to Align and Translate. international conference on learning representations (2015)."},{"key":"e_1_3_2_1_3_1","unstructured":"Samy Bengio Oriol Vinyals Navdeep Jaitly and Noam M. Shazeer. 2015. Scheduled sampling for sequence prediction with recurrent Neural networks. neural information processing systems (2015) 1171--1179.  Samy Bengio Oriol Vinyals Navdeep Jaitly and Noam M. Shazeer. 2015. Scheduled sampling for sequence prediction with recurrent Neural networks. neural information processing systems (2015) 1171--1179."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.28.6"},{"key":"e_1_3_2_1_5_1","unstructured":"C. Julian Chen Ramesh A. Gopinath Michael D. Monkowski Michael A. Picheny and Katherine Shen. 1997. New methods in continuous Mandarin speech recognition.. In EUROSPEECH.  C. Julian Chen Ramesh A. Gopinath Michael D. Monkowski Michael A. Picheny and Katherine Shen. 1997. New methods in continuous Mandarin speech recognition.. In EUROSPEECH."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2839004"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Joon Son Chung Andrew W Senior Oriol Vinyals and Andrew Zisserman. 2017. Lip Reading Sentences in the Wild. In CVPR. 3444--3453.  Joon Son Chung Andrew W Senior Oriol Vinyals and Andrew Zisserman. 2017. Lip Reading Sentences in the Wild. In CVPR. 3444--3453.","DOI":"10.1109\/CVPR.2017.367"},{"key":"e_1_3_2_1_9_1","volume-title":"Asian Conference on Computer Vision. Springer, 87--103","author":"Chung Joon Son","year":"2016"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_1_11_1","first-page":"1755","article-title":"Dlib-ml: A machine learning toolkit","author":"King Davis E","year":"2009","journal-title":"Journal of Machine Learning Research 10"},{"key":"e_1_3_2_1_12_1","volume-title":"Network In Network. international conference on learning representations","author":"Lin Min","year":"2014"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472088"},{"key":"e_1_3_2_1_14_1","unstructured":"Shuang Yang Yuanhang Zhang Dalu Feng Mingmin Yang Chenhao Wang Jingyun Xiao Keyu Long Shiguang Shan and Xilin Chen. 2018. LRW-1000: A Naturally-Distributed Large-Scale Benchmark for Lip Reading in the Wild. arXiv preprint arXiv:1810.06990 (2018).  Shuang Yang Yuanhang Zhang Dalu Feng Mingmin Yang Chenhao Wang Jingyun Xiao Keyu Long Shiguang Shan and Xilin Chen. 2018. LRW-1000: A Naturally-Distributed Large-Scale Benchmark for Lip Reading in the Wild. arXiv preprint arXiv:1810.06990 (2018)."},{"key":"e_1_3_2_1_15_1","volume-title":"Understanding Pictograph with Facial Features: End-to-End Sentence-level Lip Reading of Chinese. In AAAI 2019: Thirty-Third AAAI Conference on Artificial Intelligence.","author":"Zhang Xiaobing","year":"2019"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Shiyu Zhou Linhao Dong Shuang Xu and Bo Xu. 2018. A Comparison of Modeling Units in Sequence-to-Sequence Speech Recognition with the Transformer on Mandarin Chinese. international conference on neural information processing 2018 (2018) 210--220.  Shiyu Zhou Linhao Dong Shuang Xu and Bo Xu. 2018. A Comparison of Modeling Units in Sequence-to-Sequence Speech Recognition with the Transformer on Mandarin Chinese. international conference on neural information processing 2018 (2018) 210--220.","DOI":"10.1007\/978-3-030-04221-9_19"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1107"}],"event":{"name":"MMAsia '19: ACM Multimedia Asia","location":"Beijing China","acronym":"MMAsia '19","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the ACM Multimedia Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3338533.3366579","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3338533.3366579","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:46Z","timestamp":1750203886000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3338533.3366579"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,15]]},"references-count":17,"alternative-id":["10.1145\/3338533.3366579","10.1145\/3338533"],"URL":"https:\/\/doi.org\/10.1145\/3338533.3366579","relation":{},"subject":[],"published":{"date-parts":[[2019,12,15]]},"assertion":[{"value":"2020-01-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}