{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:14:57Z","timestamp":1777655697046,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61836002,62072397"],"award-info":[{"award-number":["61836002,62072397"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","award":["2018AAA0100603"],"award-info":[{"award-number":["2018AAA0100603"]}],"id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Natural Science Foundation","award":["LR19F020006"],"award-info":[{"award-number":["LR19F020006"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475544","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T04:52:26Z","timestamp":1634532746000},"page":"4118-4127","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":34,"title":["SimulSLT"],"prefix":"10.1145","author":[{"given":"Aoxiong","family":"Yin","sequence":"first","affiliation":[{"name":"Zhejiang University, Hang Zhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhou","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hang Zhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinglin","family":"Liu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hang Zhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weike","family":"Jin","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hang Zhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xingshan","family":"Zeng","sequence":"additional","affiliation":[{"name":"Huawei Noah's Ark Lab, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofei","family":"He","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hang Zhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1126"},{"key":"e_1_3_2_1_2_1","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings,, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1409","author":"Bahdanau Dzmitry","year":"2015"},{"key":"e_1_3_2_1_3_1","volume-title":"Enriching Word Vectors with Subword Information. arXiv preprint arXiv:1607.04606","author":"Bojanowski Piotr","year":"2016"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00422-006-0068-6"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00812"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-66823-5_18"},{"key":"e_1_3_2_1_7_1","volume-title":"Sign Language Transformers: Joint End-to-End Sign Language Recognition and Translation. In 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020","author":"Camg\u00f6z Necati Cihan","year":"2020"},{"key":"e_1_3_2_1_8_1","volume-title":"Can neural machine translation do simultaneous translation? CoRR","author":"Cho Kyunghyun","year":"2012"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/1289189.1289273"},{"key":"e_1_3_2_1_11_1","volume-title":"CIF: Continuous Integrate-And-Fire for End-To-End Speech Recognition. In 2020 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2020","author":"Dong Linhao","year":"2020"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, AISTATS 2010, Chia Laguna Resort, Sardinia, Italy, May 13-15, 2010 (JMLR Proceedings","volume":"256","author":"Glorot Xavier","year":"2010"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI-18)","author":"Guo Dan","year":"2018"},{"key":"e_1_3_2_1_15_1","volume-title":"Distilling the Knowledge in a Neural Network. CoRR","author":"Hinton Geoffrey E.","year":"2015"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Jie Huang Wengang Zhou Houqiang Li and Weiping Li. 2015a. Sign Language Recognition using 3D convolutional neural networks. 6 pages. https:\/\/doi.org\/10.1109\/ICME.2015.7177428  Jie Huang Wengang Zhou Houqiang Li and Weiping Li. 2015a. Sign Language Recognition using 3D convolutional neural networks. 6 pages. https:\/\/doi.org\/10.1109\/ICME.2015.7177428","DOI":"10.1109\/ICME.2015.7177428"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Jie Huang Wengang Zhou Houqiang Li and Weiping Li. 2015b. Sign Language Recognition using 3D convolutional neural networks. 6 pages. https:\/\/doi.org\/10.1109\/ICME.2015.7177428  Jie Huang Wengang Zhou Houqiang Li and Weiping Li. 2015b. Sign Language Recognition using 3D convolutional neural networks. 6 pages. https:\/\/doi.org\/10.1109\/ICME.2015.7177428","DOI":"10.1109\/ICME.2015.7177428"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2870740"},{"key":"e_1_3_2_1_19_1","first-page":"17137","volume-title":"Video-Based Sign Language Recognition Without Temporal Segmentation. BT - Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI-18)","author":"Huang Jie","year":"2018"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475463"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475456"},{"key":"e_1_3_2_1_22_1","volume-title":"MS-ASL: A Large-Scale Data Set and Benchmark for Understanding American Sign Language. In 30th British Machine Vision Conference 2019, BMVC 2019","author":"Vaezi Joze Hamid Reza","year":"2019"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics","author":"Kim Yoon","year":"1865"},{"key":"e_1_3_2_1_24_1","first-page":"1","article-title":"Neural sign language translation based on human keypoint estimation","volume":"9","author":"Ko Sang Ki","year":"2019","journal-title":"Applied Sciences (Switzerland)"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2015.09.013"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.412"},{"key":"e_1_3_2_1_27_1","volume-title":"Re-Sign: Re-Aligned End-to-End Sequence Modelling with Deep Recurrent CNN-HMMs. In 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017","author":"Koller Oscar","year":"2017"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1121-3"},{"key":"e_1_3_2_1_29_1","unstructured":"Dongxu Li Cristian Rodriguez Opazo Xin Yu and Hongdong Li. 2019 a. Word-level deep sign language recognition from video: A new large-scale dataset and methods comparison. In arXiv. 1459--1469.  Dongxu Li Cristian Rodriguez Opazo Xin Yu and Hongdong Li. 2019 a. Word-level deep sign language recognition from video: A new large-scale dataset and methods comparison. In arXiv. 1459--1469."},{"key":"e_1_3_2_1_30_1","unstructured":"Dongxu Li Cristian Rodriguez Opazo Xin Yu and Hongdong Li. 2019 b. Word-level deep sign language recognition from video: A new large-scale dataset and methods comparison. In arXiv. 1459--1469.  Dongxu Li Cristian Rodriguez Opazo Xin Yu and Hongdong Li. 2019 b. Word-level deep sign language recognition from video: A new large-scale dataset and methods comparison. In arXiv. 1459--1469."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093512"},{"key":"e_1_3_2_1_32_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020","author":"Li Dongxu","year":"2020"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413740"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"e_1_3_2_1_35_1","series-title":"STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework. In Proceedings of the 57th Conference of the Association for Computational Linguistics, ACL","volume-title":"Long Papers,, Anna Korhonen, David R. Traum, and Llu\u00eds M\u00e0 rquez (Eds.)","author":"Ma Mingbo","year":"2019"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 1st Conference of the Asia-Pacific","author":"Ma Xutai","year":"2020"},{"key":"e_1_3_2_1_37_1","volume-title":"Monotonic Multihead Attention. In 8th International Conference on Learning Representations, ICLR 2020","author":"Ma Xutai","year":"2020"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(97)00011-7"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/1565714.1565744"},{"key":"e_1_3_2_1_40_1","volume-title":"Neural sign language translation by learning tokenization. arXiv","author":"Orbay Alptekin","year":"2020"},{"key":"e_1_3_2_1_41_1","volume-title":"Deafness and hearing loss. https:\/\/www.who.int\/news-room\/fact-sheets\/detail\/deafness-and-hearing-loss Retrieved","author":"World Health Organization","year":"2021"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.350"},{"key":"e_1_3_2_1_44_1","volume-title":"Sign Language Translation System Using Continuous DP Matching. Journal of Machine Vision and Applications","author":"Sagawa Hirohiko","year":"1992"},{"key":"e_1_3_2_1_45_1","volume-title":"Parallel Temporal Encoder For Sign Language Translation. In 2019 IEEE International Conference on Image Processing, ICIP 2019","author":"Song Peipei","year":"2019"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.5555\/525981.849918"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/0031-3203(88)90048-9"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning, ICML 2019","volume":"6114","author":"Tan Mingxing","year":"2019"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_50_1","volume-title":"Amsterdam, The Netherlands","author":"Yin Fang","year":"2016"},{"key":"e_1_3_2_1_51_1","volume-title":"Better Sign Language Translation with STMC-Transformer. (2020). arxiv","author":"Yin Kayo","year":"2004"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2632856.2632931"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475544","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475544","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:49:10Z","timestamp":1750193350000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475544"}},"subtitle":["End-to-End Simultaneous Sign Language Translation"],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":52,"alternative-id":["10.1145\/3474085.3475544","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475544","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}