{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:19:51Z","timestamp":1775067591459,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T00:00:00Z","timestamp":1602460800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["61836002U161146161751209"],"award-info":[{"award-number":["61836002U161146161751209"]}]},{"name":"National Key R&D Program of China","award":["2018AAA0100603"],"award-info":[{"award-number":["2018AAA0100603"]}]},{"name":"Zhejiang Natural Science Foundation","award":["LR19F020006"],"award-info":[{"award-number":["LR19F020006"]}]},{"name":"Fundamental Research Funds for the Central Universities","award":["2020QNA5024"],"award-info":[{"award-number":["2020QNA5024"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,10,12]]},"DOI":"10.1145\/3394171.3413740","type":"proceedings-article","created":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T13:10:44Z","timestamp":1602508244000},"page":"4328-4336","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["FastLR"],"prefix":"10.1145","author":[{"given":"Jinglin","family":"Liu","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Yi","family":"Ren","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Zhou","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Chen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Baoxing","family":"Huai","sequence":"additional","affiliation":[{"name":"HUAWEI TECHNOLOGIES CO., LTD., Shenzhen, China"}]},{"given":"Jing","family":"Yuan","sequence":"additional","affiliation":[{"name":"Huawei Cloud BU, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2020,10,12]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Triantafyllos Afouras Joon Son Chung Andrew Senior Oriol Vinyals and Andrew Zisserman. 2018b. Deep audio-visual speech recognition. IEEE transactions on pattern analysis and machine intelligence (2018).  Triantafyllos Afouras Joon Son Chung Andrew Senior Oriol Vinyals and Andrew Zisserman. 2018b. Deep audio-visual speech recognition. IEEE transactions on pattern analysis and machine intelligence (2018)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Triantafyllos Afouras Joon Son Chung and Andrew Zisserman. 2018a. Deep lip reading: a comparison of models and an online application. arXiv preprint arXiv:1806.06053 (2018).  Triantafyllos Afouras Joon Son Chung and Andrew Zisserman. 2018a. Deep lip reading: a comparison of models and an online application. arXiv preprint arXiv:1806.06053 (2018).","DOI":"10.21437\/Interspeech.2018-1943"},{"key":"e_1_3_2_2_3_1","volume-title":"Lipnet: End-to-end sentence-level lipreading. arXiv preprint arXiv:1611.01599","author":"Assael Yannis M","year":"2016"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Anthony N Burkitt. 2006. A review of the integrate-and-fire neuron model: I. Homogeneous synaptic input. Biological cybernetics Vol. 95 1 (2006) 1--19.  Anthony N Burkitt. 2006. A review of the integrate-and-fire neuron model: I. Homogeneous synaptic input. Biological cybernetics Vol. 95 1 (2006) 1--19.","DOI":"10.1007\/s00422-006-0068-6"},{"key":"e_1_3_2_2_5_1","unstructured":"Nanxin Chen Shinji Watanabe Jes\u00fas Villalba and Najim Dehak. 2019. Non-Autoregressive Transformer Automatic Speech Recognition. arXiv preprint arXiv:1911.04908 (2019).  Nanxin Chen Shinji Watanabe Jes\u00fas Villalba and Najim Dehak. 2019. Non-Autoregressive Transformer Automatic Speech Recognition. arXiv preprint arXiv:1911.04908 (2019)."},{"key":"e_1_3_2_2_6_1","unstructured":"Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014).  Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.367"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Joon Son Chung and AP Zisserman. 2017. Lip reading in profile. (2017).  Joon Son Chung and AP Zisserman. 2017. Lip reading in profile. (2017).","DOI":"10.5244\/C.31.155"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"e_1_3_2_2_10_1","volume-title":"CIF: Continuous Integrate-and-Fire for End-to-End Speech Recognition. arXiv preprint arXiv:1905.11235","author":"Dong Linhao","year":"2019"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1633"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_2_13_1","unstructured":"Jiatao Gu James Bradbury Caiming Xiong Victor OK Li and Richard Socher. 2017. Non-autoregressive neural machine translation. arXiv preprint arXiv:1711.02281 (2017).  Jiatao Gu James Bradbury Caiming Xiong Victor OK Li and Richard Socher. 2017. Non-autoregressive neural machine translation. arXiv preprint arXiv:1711.02281 (2017)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013723"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"crossref","unstructured":"Jason Lee Elman Mansimov and Kyunghyun Cho. 2018. Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement. In EMNLP. 1173--1182.  Jason Lee Elman Mansimov and Kyunghyun Cho. 2018. Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement. In EMNLP. 1173--1182.","DOI":"10.18653\/v1\/D18-1149"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/534"},{"key":"e_1_3_2_2_17_1","unstructured":"Xuezhe Ma Chunting Zhou Xian Li Graham Neubig and Eduard Hovy. 2019. FlowSeq: Non-Autoregressive Conditional Sequence Generation with Generative Flow. In EMNLP-IJCNLP. 4273--4283.  Xuezhe Ma Chunting Zhou Xian Li Graham Neubig and Eduard Hovy. 2019. FlowSeq: Non-Autoregressive Conditional Sequence Generation with Generative Flow. In EMNLP-IJCNLP. 4273--4283."},{"key":"e_1_3_2_2_18_1","unstructured":"Aaron van den Oord Yazhe Li Igor Babuschkin Karen Simonyan Oriol Vinyals Koray Kavukcuoglu George van den Driessche Edward Lockhart Luis C Cobo Florian Stimberg et almbox. 2017. Parallel wavenet: Fast high-fidelity speech synthesis. arXiv preprint arXiv:1711.10433 (2017).  Aaron van den Oord Yazhe Li Igor Babuschkin Karen Simonyan Oriol Vinyals Koray Kavukcuoglu George van den Driessche Edward Lockhart Luis C Cobo Florian Stimberg et almbox. 2017. Parallel wavenet: Fast high-fidelity speech synthesis. arXiv preprint arXiv:1711.10433 (2017)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639643"},{"key":"e_1_3_2_2_20_1","unstructured":"Yi Ren Chenxu Hu Tao Qin Sheng Zhao Zhou Zhao and Tie-Yan Liu. 2020 a. FastSpeech 2: Fast and High-Quality End-to-End Text-to-Speech. arXiv preprint arXiv:2006.04558 (2020).  Yi Ren Chenxu Hu Tao Qin Sheng Zhao Zhou Zhao and Tie-Yan Liu. 2020 a. FastSpeech 2: Fast and High-Quality End-to-End Text-to-Speech. arXiv preprint arXiv:2006.04558 (2020)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Yi Ren Jinglin Liu Xu Tan Sheng Zhao Zhou Zhao and Tie-Yan Liu. 2020 b. A Study of Non-autoregressive Model for Sequence Generation. arXiv preprint arXiv:2004.10454 (2020).  Yi Ren Jinglin Liu Xu Tan Sheng Zhao Zhou Zhao and Tie-Yan Liu. 2020 b. A Study of Non-autoregressive Model for Sequence Generation. arXiv preprint arXiv:2004.10454 (2020).","DOI":"10.18653\/v1\/2020.acl-main.15"},{"key":"e_1_3_2_2_22_1","volume-title":"Fastspeech: Fast, robust and controllable text to speech. In Advances in Neural Information Processing Systems. 3165--3174.","author":"Ren Yi","year":"2019"},{"key":"e_1_3_2_2_23_1","unstructured":"Rico Sennrich Barry Haddow and Alexandra Birch. 2015. Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909 (2015).  Rico Sennrich Barry Haddow and Alexandra Birch. 2015. Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909 (2015)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"Brendan Shillingford Yannis Assael Matthew W Hoffman Thomas Paine C\u00edian Hughes Utsav Prabhu Hank Liao Hasim Sak Kanishka Rao Lorrayne Bennett et almbox. 2018. Large-scale visual speech recognition. arXiv preprint arXiv:1807.05162 (2018).  Brendan Shillingford Yannis Assael Matthew W Hoffman Thomas Paine C\u00edian Hughes Utsav Prabhu Hank Liao Hasim Sak Kanishka Rao Lorrayne Bennett et almbox. 2018. Large-scale visual speech recognition. arXiv preprint arXiv:1807.05162 (2018).","DOI":"10.21437\/Interspeech.2019-1669"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Themos Stafylakis and Georgios Tzimiropoulos. 2017. Combining residual networks with LSTMs for lipreading. arXiv preprint arXiv:1703.04105 (2017).  Themos Stafylakis and Georgios Tzimiropoulos. 2017. Combining residual networks with LSTMs for lipreading. arXiv preprint arXiv:1703.04105 (2017).","DOI":"10.21437\/Interspeech.2017-85"},{"key":"e_1_3_2_2_26_1","unstructured":"Ilya Sutskever Oriol Vinyals and Quoc V Le. 2014. Sequence to sequence learning with neural networks. In Advances in neural information processing systems. 3104--3112.  Ilya Sutskever Oriol Vinyals and Quoc V Le. 2014. Sequence to sequence learning with neural networks. In Advances in neural information processing systems. 3104--3112."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2017.2761539"},{"key":"e_1_3_2_2_28_1","unstructured":"Ashish Vaswani Samy Bengio Eugene Brevdo Francois Chollet Aidan N. Gomez Stephan Gouws Llion Jones \u0141ukasz Kaiser Nal Kalchbrenner Niki Parmar Ryan Sepassi Noam Shazeer and Jakob Uszkoreit. 2018. Tensor2Tensor for Neural Machine Translation. CoRR Vol. abs\/1803.07416 (2018). http:\/\/arxiv.org\/abs\/1803.07416  Ashish Vaswani Samy Bengio Eugene Brevdo Francois Chollet Aidan N. Gomez Stephan Gouws Llion Jones \u0141ukasz Kaiser Nal Kalchbrenner Niki Parmar Ryan Sepassi Noam Shazeer and Jakob Uszkoreit. 2018. Tensor2Tensor for Neural Machine Translation. CoRR Vol. abs\/1803.07416 (2018). http:\/\/arxiv.org\/abs\/1803.07416"},{"key":"e_1_3_2_2_29_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in neural information processing systems. 5998--6008.  Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in neural information processing systems. 5998--6008."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472852"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"Yiren Wang Fei Tian Di He Tao Qin ChengXiang Zhai and Tie-Yan Liu. 2019. Non-Autoregressive Machine Translation with Auxiliary Regularization. In AAAI.  Yiren Wang Fei Tian Di He Tao Qin ChengXiang Zhai and Tie-Yan Liu. 2019. Non-Autoregressive Machine Translation with Auxiliary Regularization. In AAAI.","DOI":"10.1609\/aaai.v33i01.33015377"},{"key":"e_1_3_2_2_32_1","unstructured":"Bang Yang Fenglin Liu and Yuexian Zou. 2019. Non-Autoregressive Video Captioning with Iterative Refinement. arXiv preprint arXiv:1911.12018 (2019).  Bang Yang Fenglin Liu and Yuexian Zou. 2019. Non-Autoregressive Video Captioning with Iterative Refinement. arXiv preprint arXiv:1911.12018 (2019)."},{"key":"e_1_3_2_2_33_1","volume-title":"Hearing Lips: Improving Lip Reading by Distilling Speech Recognizers. arXiv preprint arXiv:1911.11502","author":"Zhao Ya","year":"2019"}],"event":{"name":"MM '20: The 28th ACM International Conference on Multimedia","location":"Seattle WA USA","acronym":"MM '20","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 28th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413740","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394171.3413740","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:01:16Z","timestamp":1750197676000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413740"}},"subtitle":["Non-Autoregressive Lipreading Model with Integrate-and-Fire"],"short-title":[],"issued":{"date-parts":[[2020,10,12]]},"references-count":33,"alternative-id":["10.1145\/3394171.3413740","10.1145\/3394171"],"URL":"https:\/\/doi.org\/10.1145\/3394171.3413740","relation":{},"subject":[],"published":{"date-parts":[[2020,10,12]]},"assertion":[{"value":"2020-10-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}