{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:54:59Z","timestamp":1781974499527,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB3303900"],"award-info":[{"award-number":["2022YFB3303900"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013058","name":"Jiangsu Provincial Key Research and Development Program","doi-asserted-by":"publisher","award":["BE2020001-4"],"award-info":[{"award-number":["BE2020001-4"]}],"id":[{"id":"10.13039\/501100013058","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172208, 62272216, 61832008, 61872174"],"award-info":[{"award-number":["62172208, 62272216, 61832008, 61872174"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Collaborative Innovation Center of Novel Software Technology andIndustrialization"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611820","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"4502-4512","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Towards Real-Time Sign Language Recognition and Translation on Edge Devices"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3360-4321","authenticated-orcid":false,"given":"Shiwei","family":"Gan","sequence":"first","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9497-6244","authenticated-orcid":false,"given":"Yafeng","family":"Yin","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5243-4992","authenticated-orcid":false,"given":"Zhiwei","family":"Jiang","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2994-6743","authenticated-orcid":false,"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1467-4519","authenticated-orcid":false,"given":"Sanglu","family":"Lu","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"American sign language recognition using deep learning and computer vision. In 2018 Big Data","author":"Bantupalli Kshitij","unstructured":"Kshitij Bantupalli and Ying Xie. 2018. American sign language recognition using deep learning and computer vision. In 2018 Big Data. IEEE, 4896--4899."},{"key":"e_1_3_2_1_2_1","volume-title":"Workshop on representation and processing of sign languages, LREC","volume":"4","author":"Bungeroth Jan","year":"2004","unstructured":"Jan Bungeroth and Hermann Ney. 2004. Statistical sign language translation. In Workshop on representation and processing of sign languages, LREC, Vol. 4. Citeseer, 105--108."},{"key":"e_1_3_2_1_3_1","volume-title":"Subunets: End-to-end hand shape and continuous sign language recognition","author":"Camgoz Necati Cihan","year":"2017","unstructured":"Necati Cihan Camgoz, Simon Hadfield, Oscar Koller, and Richard Bowden. 2017. Subunets: End-to-end hand shape and continuous sign language recognition. In ICCV. IEEE, 3075--3084."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","unstructured":"N. C. Camgoz S. Hadfield O. Koller H. Ney and R. Bowden. 2018. Neural Sign Language Translation. In CVPR. 7784--7793. https:\/\/doi.org\/10.1109\/CVPR.2018.00812","DOI":"10.1109\/CVPR.2018.00812"},{"key":"e_1_3_2_1_5_1","volume-title":"Multi-channel Transformers for Multi-articulatory Sign Language Translation. arXiv preprint arXiv:2009.00299","author":"Camgoz Necati Cihan","year":"2020","unstructured":"Necati Cihan Camgoz, Oscar Koller, Simon Hadfield, and Richard Bowden. 2020a. Multi-channel Transformers for Multi-articulatory Sign Language Translation. arXiv preprint arXiv:2009.00299 (2020)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Necati Cihan Camgoz Oscar Koller Simon Hadfield and Richard Bowden. 2020b. Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation. In CVPR. 10023--10033.","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Zhe Cao Tomas Simon Shih-En Wei and Yaser Sheikh. 2017. Realtime multi-person 2d pose estimation using part affinity fields. In CVPR. 7291--7299.","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Yutong Chen Fangyun Wei Xiao Sun Zhirong Wu and Stephen Lin. 2022a. A simple multi-modality transfer learning baseline for sign language translation. In CVPR. 5120--5130.","DOI":"10.1109\/CVPR52688.2022.00506"},{"key":"e_1_3_2_1_9_1","first-page":"17043","article-title":"Two-stream network for sign language recognition and translation","volume":"35","author":"Chen Yutong","year":"2022","unstructured":"Yutong Chen, Ronglai Zuo, Fangyun Wei, Yu Wu, Shujie Liu, and Brian Mak. 2022b. Two-stream network for sign language recognition and translation. Advances in Neural Information Processing Systems, Vol. 35 (2022), 17043--17056.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58586-0_41"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.175"},{"key":"e_1_3_2_1_12_1","first-page":"1880","article-title":"A deep neural framework for continuous sign language recognition by iterative training","volume":"21","author":"Cui Runpeng","year":"2019","unstructured":"Runpeng Cui, Hu Liu, and Changshui Zhang. 2019. A deep neural framework for continuous sign language recognition by iterative training. MM, Vol. 21, 7 (2019), 1880--1891.","journal-title":"MM"},{"key":"e_1_3_2_1_13_1","volume-title":"Repvgg: Making vgg-style convnets great again. In CVPR. 13733--13742.","author":"Ding Xiaohan","year":"2021","unstructured":"Xiaohan Ding, Xiangyu Zhang, Ningning Ma, Jungong Han, Guiguang Ding, and Jian Sun. 2021. Repvgg: Making vgg-style convnets great again. In CVPR. 13733--13742."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Shiwei Gan Yafeng Yin Zhiwei Jiang Lei Xie and Sanglu Lu. 2021. Skeleton-Aware Neural Sign Language Translation. In ACM MM. 4353--4361.","DOI":"10.1145\/3474085.3475577"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Alex Graves Santiago Fern\u00e1ndez Faustino Gomez and J\u00fcrgen Schmidhuber. 2006. Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In ICML. 369--376.","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.1997.625742"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2941267"},{"key":"e_1_3_2_1_18_1","volume-title":"Sign language recognition based on adaptive hmms with data augmentation","author":"Guo Dan","unstructured":"Dan Guo, Wengang Zhou, Meng Wang, and Houqiang Li. 2016. Sign language recognition based on adaptive hmms with data augmentation. In ICIP. IEEE, 2876--2880."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Aiming Hao Yuecong Min and Xilin Chen. 2021. Self-Mutual Distillation Learning for Continuous Sign Language Recognition. In ICCV. 11303--11312.","DOI":"10.1109\/ICCV48922.2021.01111"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16247"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2870740"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Jie Huang Wengang Zhou Qilin Zhang Houqiang Li and Weiping Li. 2018b. Video-based sign language recognition without temporal segmentation. In AAAI.","DOI":"10.1609\/aaai.v32i1.11903"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00219"},{"key":"e_1_3_2_1_24_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_25_1","volume-title":"Weakly supervised learning with multi-stream CNN-LSTM-HMMs to discover sequential parallelism in sign language videos. TPAMI","author":"Koller Oscar","year":"2019","unstructured":"Oscar Koller, Cihan Camgoz, Hermann Ney, and Richard Bowden. 2019. Weakly supervised learning with multi-stream CNN-LSTM-HMMs to discover sequential parallelism in sign language videos. TPAMI (2019)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2015.09.013"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Oscar Koller Hermann Ney and Richard Bowden. 2016a. Deep hand: How to train a cnn on 1 million hand images when your data is continuous and weakly labelled. In CVPR. 3793--3802.","DOI":"10.1109\/CVPR.2016.412"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Oscar Koller O Zargaran Hermann Ney and Richard Bowden. 2016b. Deep sign: Hybrid CNN-HMM for continuous sign language recognition. In BMVC.","DOI":"10.5244\/C.30.136"},{"key":"e_1_3_2_1_29_1","volume-title":"Re-sign: Re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In CVPR. 4297--4305.","author":"Koller Oscar","year":"2017","unstructured":"Oscar Koller, Sepehr Zargaran, and Hermann Ney. 2017. Re-sign: Re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In CVPR. 4297--4305."},{"key":"e_1_3_2_1_30_1","volume-title":"NIPS","volume":"33","author":"Li Dongxu","year":"2020","unstructured":"Dongxu Li, Chenchen Xu, Xin Yu, Kaihao Zhang, Benjamin Swift, Hanna Suominen, and Hongdong Li. 2020b. TSPNet: Hierarchical Feature Learning via Temporal Semantic Pyramid for Sign Language Translation. In NIPS, Vol. 33."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054316"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11604"},{"key":"e_1_3_2_1_33_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_34_1","volume-title":"Sign language recognition with long short-term memory","author":"Liu Tao","unstructured":"Tao Liu, Wengang Zhou, and Houqiang Li. 2016. Sign language recognition with long short-term memory. In ICIP. IEEE, 2871--2875."},{"key":"e_1_3_2_1_35_1","unstructured":"Yuecong Min Aiming Hao Xiujuan Chai and Xilin Chen. 2021. Visual alignment constraint for continuous sign language recognition. In ICCV. 11542--11551."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_11"},{"key":"e_1_3_2_1_37_1","volume-title":"Graph neural networks exponentially lose expressive power for node classification. arXiv preprint arXiv:1905.10947","author":"Oono Kenta","year":"2019","unstructured":"Kenta Oono and Taiji Suzuki. 2019. Graph neural networks exponentially lose expressive power for node classification. arXiv preprint arXiv:1905.10947 (2019)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00002"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Kishore Papineni Salim Roukos Todd Ward and Wei-Jing Zhu. 2002. BLEU: a method for automatic evaluation of machine translation. In ACL. 311--318.","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Junfu Pu Wengang Zhou Hezhen Hu and Houqiang Li. 2020. Boosting Continuous Sign Language Recognition via Cross Modality Augmentation. In MM. 1497--1505.","DOI":"10.1145\/3394171.3413931"},{"key":"e_1_3_2_1_41_1","first-page":"7","article-title":"Dilated Convolutional Network with Iterative Optimization for Continuous Sign Language Recognition","volume":"3","author":"Pu Junfu","year":"2018","unstructured":"Junfu Pu, Wengang Zhou, and Houqiang Li. 2018. Dilated Convolutional Network with Iterative Optimization for Continuous Sign Language Recognition.. In IJCAI, Vol. 3. 7.","journal-title":"IJCAI"},{"key":"e_1_3_2_1_42_1","unstructured":"Junfu Pu Wengang Zhou and Houqiang Li. 2019. Iterative alignment network for continuous sign language recognition. In CVPR. 4165--4174."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Ke Sun Bin Xiao Dong Liu and Jingdong Wang. 2019. Deep high-resolution representation learning for human pose estimation. In CVPR. 5693--5703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"e_1_3_2_1_44_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_45_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In NIPS. 5998--6008."},{"key":"e_1_3_2_1_46_1","first-page":"550","article-title":"Residual networks behave like ensembles of relatively shallow networks","volume":"29","author":"Veit Andreas","year":"2016","unstructured":"Andreas Veit, Michael J Wilber, and Serge Belongie. 2016. Residual networks behave like ensembles of relatively shallow networks. NIPS, Vol. 29 (2016), 550--558.","journal-title":"NIPS"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897735"},{"key":"e_1_3_2_1_48_1","first-page":"1138","article-title":"Semantic Boundary Detection with Reinforcement Learning for Continuous Sign Language Recognition","volume":"31","author":"Wei Chengcheng","year":"2020","unstructured":"Chengcheng Wei, Jian Zhao, Wengang Zhou, and Houqiang Li. 2020. Semantic Boundary Detection with Reinforcement Learning for Continuous Sign Language Recognition. TCSVT, Vol. 31, 3 (2020), 1138--1149.","journal-title":"TCSVT"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Sijie Yan Yuanjun Xiong and Dahua Lin. 2018. Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition. In AAAI.","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Aoxiong Yin Zhou Zhao Jinglin Liu Weike Jin Meng Zhang Xingshan Zeng and Xiaofei He. 2021b. SimulSLT: End-to-End Simultaneous Sign Language Translation. In MM. 4118--4127.","DOI":"10.1145\/3474085.3475544"},{"key":"e_1_3_2_1_51_1","volume-title":"Including signed languages in natural language processing. arXiv preprint arXiv:2105.05222","author":"Yin Kayo","year":"2021","unstructured":"Kayo Yin, Amit Moryossef, Julie Hochgesang, Yoav Goldberg, and Malihe Alikhani. 2021a. Including signed languages in natural language processing. arXiv preprint arXiv:2105.05222 (2021)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Kayo Yin and Jesse Read. 2020. Better sign language translation with STMC-transformer. In COLING. 5975--5989.","DOI":"10.18653\/v1\/2020.coling-main.525"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Jihai Zhang Wengang Zhou and Houqiang Li. 2014. A threshold-based hmm-dtw approach for continuous sign language recognition. In ICIMCS. 237--240.","DOI":"10.1145\/2632856.2632931"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00223"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Hao Zhou Wengang Zhou Weizhen Qi Junfu Pu and Houqiang Li. 2021a. Improving Sign Language Translation with Monolingual Data by Sign Back-Translation. In CVPR. 1316--1325.","DOI":"10.1109\/CVPR46437.2021.00137"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Hao Zhou Wengang Zhou Yun Zhou and Houqiang Li. 2020. Spatial-Temporal Multi-Cue Network for Continuous Sign Language Recognition.. In AAAI. 13009--13016.","DOI":"10.1609\/aaai.v34i07.7001"},{"key":"e_1_3_2_1_57_1","volume-title":"Spatial-temporal multi-cue network for sign language recognition and translation. TMC","author":"Zhou Hao","year":"2021","unstructured":"Hao Zhou, Wengang Zhou, Yun Zhou, and Houqiang Li. 2021b. Spatial-temporal multi-cue network for sign language recognition and translation. TMC (2021)."},{"key":"e_1_3_2_1_58_1","unstructured":"Ronglai Zuo and Brian Mak. 2022. C2SLR: Consistency-Enhanced Continuous Sign Language Recognition. In CVPR. 5131--5140."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611820","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611820","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:55:30Z","timestamp":1755820530000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611820"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":58,"alternative-id":["10.1145\/3581783.3611820","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611820","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}