{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T22:33:32Z","timestamp":1778193212698,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61836002,62072397,62077041"],"award-info":[{"award-number":["61836002,62072397,62077041"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFC0832505"],"award-info":[{"award-number":["2020YFC0832505"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Provincial Natural Science Foundation","award":["LR19F020006"],"award-info":[{"award-number":["LR19F020006"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475463","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T20:00:05Z","timestamp":1634587205000},"page":"3172-3181","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":35,"title":["Towards Fast and High-Quality Sign Language Production"],"prefix":"10.1145","author":[{"given":"Wencan","family":"Huang","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenwen","family":"Pan","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhou","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[{"name":"Huawei Cloud &amp; AI, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1409","author":"Bahdanau Dzmitry","year":"2015"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Britta Bauer Hermann Hienz and Karl-Friedrich Kraiss. 2000. Video-Based Continuous Sign Language Recognition Using Statistical Methods. ICPR 2463--2466. https:\/\/doi.org\/10.1109\/ICPR.2000.906112  Britta Bauer Hermann Hienz and Karl-Friedrich Kraiss. 2000. Video-Based Continuous Sign Language Recognition Using Statistical Methods. ICPR 2463--2466. https:\/\/doi.org\/10.1109\/ICPR.2000.906112","DOI":"10.1109\/ICPR.2000.906112"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969239.2969370"},{"key":"e_1_3_2_2_5_1","unstructured":"Christopher M Bishop. 1994. Mixture density networks. (1994).  Christopher M Bishop. 1994. Mixture density networks. (1994)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Necati Cihan Camg\u00f6 z Simon Hadfield Oscar Koller and Richard Bowden. 2017. SubUNets: End-to-End Hand Shape and Continuous Sign Language Recognition. ICCV 3075--3084. https:\/\/doi.org\/10.1109\/ICCV.2017.332  Necati Cihan Camg\u00f6 z Simon Hadfield Oscar Koller and Richard Bowden. 2017. SubUNets: End-to-End Hand Shape and Continuous Sign Language Recognition. ICCV 3075--3084. https:\/\/doi.org\/10.1109\/ICCV.2017.332","DOI":"10.1109\/ICCV.2017.332"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Necati Cihan Camg\u00f6 z Simon Hadfield Oscar Koller Hermann Ney and Richard Bowden. 2018. Neural Sign Language Translation. CVPR 7784--7793. https:\/\/doi.org\/10.1109\/CVPR.2018.00812  Necati Cihan Camg\u00f6 z Simon Hadfield Oscar Koller Hermann Ney and Richard Bowden. 2018. Neural Sign Language Translation. CVPR 7784--7793. https:\/\/doi.org\/10.1109\/CVPR.2018.00812","DOI":"10.1109\/CVPR.2018.00812"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Necati Cihan Camg\u00f6 z Oscar Koller Simon Hadfield and Richard Bowden. 2020. Sign Language Transformers: Joint End-to-End Sign Language Recognition and Translation. CVPR 10020--10030. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01004  Necati Cihan Camg\u00f6 z Oscar Koller Simon Hadfield and Richard Bowden. 2020. Sign Language Transformers: Joint End-to-End Sign Language Recognition and Translation. CVPR 10020--10030. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01004","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2929257"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.388"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"e_1_3_2_2_12_1","volume-title":"KyungHyun Cho, and Yoshua Bengio.","author":"Chung Junyoung","year":"2014"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3352587"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2021068"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation, LREC 2018","author":"Ebling Sarah","year":"2018"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/2919332.2919834"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305510"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Shiry Ginosar Amir Bar Gefen Kohavi Caroline Chan Andrew Owens and Jitendra Malik. 2019. Learning Individual Styles of Conversational Gesture. CVPR 3497--3506. https:\/\/doi.org\/10.1109\/CVPR.2019.00361  Shiry Ginosar Amir Bar Gefen Kohavi Caroline Chan Andrew Owens and Jitendra Malik. 2019. Learning Individual Styles of Conversational Gesture. CVPR 3497--3506. https:\/\/doi.org\/10.1109\/CVPR.2019.00361","DOI":"10.1109\/CVPR.2019.00361"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157382.3157612"},{"key":"e_1_3_2_2_20_1","volume-title":"Sequence Transduction with Recurrent Neural Networks. CoRR","author":"Graves Alex","year":"2012"},{"key":"e_1_3_2_2_21_1","volume-title":"Non-Autoregressive Neural Machine Translation. CoRR","author":"Gu Jiatao","year":"2017"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compedu.2005.06.004"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/INISTA.2019.8778347"},{"key":"e_1_3_2_2_25_1","unstructured":"Jaehyeon Kim Sungwon Kim Jungil Kong and Sungroh Yoon. 2020. Glow-TTS: A Generative Flow for Text-to-Speech via Monotonic Alignment Search. NIPS. https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/5c3b99e8f92532e5ad1556e53ceea00c-Abstract.html  Jaehyeon Kim Sungwon Kim Jungil Kong and Sungroh Yoon. 2020. Glow-TTS: A Generative Flow for Text-to-Speech via Monotonic Alignment Search. NIPS. https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/5c3b99e8f92532e5ad1556e53ceea00c-Abstract.html"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2015.09.013"},{"key":"e_1_3_2_2_27_1","volume-title":"Ioannis Hatzilygeroudis","author":"Kouremenos Dimitris"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3038362"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/534"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413740"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10209-015-0407-2"},{"key":"e_1_3_2_2_32_1","volume-title":"24th Signal Processing and Communication Application Conference, SIU 2016","author":"Ogulcan","year":"2016"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-3049"},{"key":"e_1_3_2_2_34_1","volume-title":"4th International Conference on Learning Representations, ICLR","author":"Radford Alec","year":"2016"},{"key":"e_1_3_2_2_35_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Ren Yi","year":"2021"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.15"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3454572"},{"key":"e_1_3_2_2_38_1","volume-title":"Adversarial Training for Multi-Channel Sign Language Production. In 31st British Machine Vision Conference 2020, BMVC 2020","author":"Saunders Ben","year":"2020"},{"key":"e_1_3_2_2_39_1","volume-title":"Necati Cihan Camg\u00f6 z, and Richard Bowden. 2020 b. Everybody Sign Now: Translating Spoken Language to Photo Realistic Sign Language Video. CoRR","author":"Saunders Ben","year":"2020"},{"key":"e_1_3_2_2_40_1","volume-title":"Progressive Transformers for End-to-End Sign Language Production (Lecture Notes in Computer Science","volume":"705","author":"Saunders Ben","year":"2020"},{"key":"e_1_3_2_2_41_1","volume-title":"Celia Shahnaz, and Shaikh Anowarul Fattah.","author":"Shahriar Shadman","year":"2018"},{"key":"e_1_3_2_2_42_1","volume-title":"Simon Hadfield, and Richard Bowden.","author":"Stoll Stephanie","year":"2018"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01281-2"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969173"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/0031-3203(88)90048-9"},{"key":"e_1_3_2_2_46_1","volume-title":"21st Annual Conference of the International Speech Communication Association, Virtual Event","author":"Tian Zhengkun","year":"2020"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.01.030"},{"key":"e_1_3_2_2_49_1","volume-title":"Convolutional Sequence Generation for Skeleton-Based Action Synthesis. In 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019","author":"Yan Sijie","year":"2019"},{"key":"e_1_3_2_2_50_1","volume-title":"Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition","author":"Yan Sijie"},{"key":"e_1_3_2_2_51_1","volume-title":"Proceedings, Part V (Lecture Notes in Computer Science","volume":"293","author":"Yan Xinchen","year":"2018"},{"key":"e_1_3_2_2_52_1","volume-title":"Non-Autoregressive Video Captioning with Iterative Refinement. CoRR","author":"Yang Bang","year":"2018"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2013.03.001"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Jan Zelinka and Jakub Kanis. 2020. Neural Sign Language Synthesis: Words Are Our Glosses. WACV 3384--3392. https:\/\/doi.org\/10.1109\/WACV45572.2020.9093516  Jan Zelinka and Jakub Kanis. 2020. Neural Sign Language Synthesis: Words Are Our Glosses. WACV 3384--3392. https:\/\/doi.org\/10.1109\/WACV45572.2020.9093516","DOI":"10.1109\/WACV45572.2020.9093516"},{"key":"e_1_3_2_2_55_1","volume-title":"Aligntts: Efficient Feed-Forward Text-to-Speech System Without Explicit Alignment. In 2020 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2020","author":"Zeng Zhen","year":"2020"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475463","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475463","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:33Z","timestamp":1750193313000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475463"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":54,"alternative-id":["10.1145\/3474085.3475463","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475463","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}