{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T16:10:18Z","timestamp":1781799018546,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20180325, BK20190293"],"award-info":[{"award-number":["BK20180325, BK20190293"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Collaborative Innovation Center of Novel Software Technology and Industrialization"},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802169, 61872174, 61832008, 61906085, 61902175"],"award-info":[{"award-number":["61802169, 61872174, 61832008, 61906085, 61902175"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&D Program of China","award":["2018AAA0102302"],"award-info":[{"award-number":["2018AAA0102302"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475577","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T05:40:18Z","timestamp":1634535618000},"page":"4353-4361","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["Skeleton-Aware Neural Sign Language Translation"],"prefix":"10.1145","author":[{"given":"Shiwei","family":"Gan","sequence":"first","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yafeng","family":"Yin","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiwei","family":"Jiang","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sanglu","family":"Lu","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473","author":"Bahdanau Dzmitry","year":"2014","unstructured":"Dzmitry Bahdanau , Kyunghyun Cho , and Yoshua Bengio . 2014. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 ( 2014 ). Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2014. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622141"},{"key":"e_1_3_2_1_3_1","volume-title":"Workshop on representation and processing of sign languages, LREC","volume":"4","author":"Bungeroth Jan","year":"2004","unstructured":"Jan Bungeroth and Hermann Ney . 2004 . Statistical sign language translation . In Workshop on representation and processing of sign languages, LREC , Vol. 4 . Citeseer, 105--108. Jan Bungeroth and Hermann Ney. 2004. Statistical sign language translation. In Workshop on representation and processing of sign languages, LREC, Vol. 4. Citeseer, 105--108."},{"key":"e_1_3_2_1_4_1","volume-title":"Subunets: End-to-end hand shape and continuous sign language recognition","author":"Camgoz Necati Cihan","year":"2017","unstructured":"Necati Cihan Camgoz , Simon Hadfield , Oscar Koller , and Richard Bowden . 2017 . Subunets: End-to-end hand shape and continuous sign language recognition . In ICCV. IEEE , 3075--3084. Necati Cihan Camgoz, Simon Hadfield, Oscar Koller, and Richard Bowden. 2017. Subunets: End-to-end hand shape and continuous sign language recognition. In ICCV. IEEE, 3075--3084."},{"key":"#cr-split#-e_1_3_2_1_5_1.1","doi-asserted-by":"crossref","unstructured":"N. C. Camgoz S. Hadfield O. Koller H. Ney and R. Bowden. 2018. Neural Sign Language Translation. In CVPR. 7784--7793. https:\/\/doi.org\/10.1109\/CVPR.2018.00812 10.1109\/CVPR.2018.00812","DOI":"10.1109\/CVPR.2018.00812"},{"key":"#cr-split#-e_1_3_2_1_5_1.2","doi-asserted-by":"crossref","unstructured":"N. C. Camgoz S. Hadfield O. Koller H. Ney and R. Bowden. 2018. Neural Sign Language Translation. In CVPR. 7784--7793. https:\/\/doi.org\/10.1109\/CVPR.2018.00812","DOI":"10.1109\/CVPR.2018.00812"},{"key":"e_1_3_2_1_6_1","volume-title":"2020 a. Multi-channel Transformers for Multi-articulatory Sign Language Translation. arXiv preprint arXiv:2009.00299","author":"Camgoz Necati Cihan","year":"2020","unstructured":"Necati Cihan Camgoz , Oscar Koller , Simon Hadfield , and Richard Bowden . 2020 a. Multi-channel Transformers for Multi-articulatory Sign Language Translation. arXiv preprint arXiv:2009.00299 ( 2020 ). Necati Cihan Camgoz, Oscar Koller, Simon Hadfield, and Richard Bowden. 2020 a. Multi-channel Transformers for Multi-articulatory Sign Language Translation. arXiv preprint arXiv:2009.00299 (2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Necati Cihan Camgoz Oscar Koller Simon Hadfield and Richard Bowden. 2020 b. Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation. In CVPR. 10023--10033.  Necati Cihan Camgoz Oscar Koller Simon Hadfield and Richard Bowden. 2020 b. Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation. In CVPR. 10023--10033.","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Zhe Cao Tomas Simon Shih-En Wei and Yaser Sheikh. 2017. Realtime multi-person 2d pose estimation using part affinity fields. In CVPR. 7291--7299.  Zhe Cao Tomas Simon Shih-En Wei and Yaser Sheikh. 2017. Realtime multi-person 2d pose estimation using part affinity fields. In CVPR. 7291--7299.","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_9_1","volume-title":"IEEE Conf. on AFGR","volume":"655","author":"Chai Xiujuan","year":"2013","unstructured":"Xiujuan Chai , Guang Li , Yushun Lin , Zhihao Xu , Yili Tang , Xilin Chen , and Ming Zhou . 2013 . Sign language recognition and translation with kinect . In IEEE Conf. on AFGR , Vol. 655 . 4. Xiujuan Chai, Guang Li, Yushun Lin, Zhihao Xu, Yili Tang, Xilin Chen, and Ming Zhou. 2013. Sign language recognition and translation with kinect. In IEEE Conf. on AFGR, Vol. 655. 4."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2889563"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00276"},{"key":"e_1_3_2_1_12_1","first-page":"162","article-title":"Isolated sign language recognition using hidden Markov models","volume":"1","author":"Grobel K.","year":"1997","unstructured":"K. Grobel and M. Assan . 1997 . Isolated sign language recognition using hidden Markov models . In SMC , Vol. 1. 162 -- 167 vol.1. https:\/\/doi.org\/10.1109\/ICSMC.1997.625742 10.1109\/ICSMC.1997.625742 K. Grobel and M. Assan. 1997. Isolated sign language recognition using hidden Markov models. In SMC, Vol. 1. 162--167 vol.1. https:\/\/doi.org\/10.1109\/ICSMC.1997.625742","journal-title":"SMC"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/3367032.3367139"},{"key":"e_1_3_2_1_14_1","first-page":"1575","article-title":"b. Hierarchical recurrent deep fusion using adaptive clip summarization for sign language translation","volume":"29","author":"Guo Dan","year":"2019","unstructured":"Dan Guo , Wengang Zhou , Anyang Li , Houqiang Li , and Meng Wang . 2019 b. Hierarchical recurrent deep fusion using adaptive clip summarization for sign language translation . TIP , Vol. 29 (2019), 1575 -- 1590 . Dan Guo, Wengang Zhou, Anyang Li, Houqiang Li, and Meng Wang. 2019 b. Hierarchical recurrent deep fusion using adaptive clip summarization for sign language translation. TIP, Vol. 29 (2019), 1575--1590.","journal-title":"TIP"},{"key":"e_1_3_2_1_15_1","volume-title":"AAAI","volume":"32","author":"Guo Dan","year":"2018","unstructured":"Dan Guo , Wengang Zhou , Houqiang Li , and Meng Wang . 2018 . Hierarchical lstm for sign language translation . In AAAI , Vol. 32 . Dan Guo, Wengang Zhou, Houqiang Li, and Meng Wang. 2018. Hierarchical lstm for sign language translation. In AAAI, Vol. 32."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7532885"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2870740"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Jie Huang Wengang Zhou Qilin Zhang Houqiang Li and Weiping Li. 2018b. Video-based sign language recognition without temporal segmentation. In AAAI.  Jie Huang Wengang Zhou Qilin Zhang Houqiang Li and Weiping Li. 2018b. Video-based sign language recognition without temporal segmentation. In AAAI.","DOI":"10.1609\/aaai.v32i1.11903"},{"key":"e_1_3_2_1_19_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba . 2014 . Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014). Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_20_1","volume-title":"Weakly supervised learning with multi-stream CNN-LSTM-HMMs to discover sequential parallelism in sign language videos. TPAMI","author":"Koller Oscar","year":"2019","unstructured":"Oscar Koller , Cihan Camgoz , Hermann Ney , and Richard Bowden . 2019. Weakly supervised learning with multi-stream CNN-LSTM-HMMs to discover sequential parallelism in sign language videos. TPAMI ( 2019 ). Oscar Koller, Cihan Camgoz, Hermann Ney, and Richard Bowden. 2019. Weakly supervised learning with multi-stream CNN-LSTM-HMMs to discover sequential parallelism in sign language videos. TPAMI (2019)."},{"key":"e_1_3_2_1_21_1","volume-title":"Re-sign: Re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In CVPR. 4297--4305.","author":"Koller Oscar","year":"2017","unstructured":"Oscar Koller , Sepehr Zargaran , and Hermann Ney . 2017 . Re-sign: Re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In CVPR. 4297--4305. Oscar Koller, Sepehr Zargaran, and Hermann Ney. 2017. Re-sign: Re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In CVPR. 4297--4305."},{"key":"e_1_3_2_1_22_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Li Dongxu","year":"2020","unstructured":"Dongxu Li , Chenchen Xu , Xin Yu , Kaihao Zhang , Benjamin Swift , Hanna Suominen , and Hongdong Li . 2020 . TSPNet: Hierarchical Feature Learning via Temporal Semantic Pyramid for Sign Language Translation . In Advances in Neural Information Processing Systems , Vol. 33 . Dongxu Li, Chenchen Xu, Xin Yu, Kaihao Zhang, Benjamin Swift, Hanna Suominen, and Hongdong Li. 2020. TSPNet: Hierarchical Feature Learning via Temporal Semantic Pyramid for Sign Language Translation. In Advances in Neural Information Processing Systems, Vol. 33."},{"key":"e_1_3_2_1_23_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin . 2004 . Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81. Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_24_1","volume-title":"Sign language recognition with long short-term memory","author":"Liu Tao","unstructured":"Tao Liu , Wengang Zhou , and Houqiang Li. 2016. Sign language recognition with long short-term memory . In ICIP. IEEE , 2871--2875. Tao Liu, Wengang Zhou, and Houqiang Li. 2016. Sign language recognition with long short-term memory. In ICIP. IEEE, 2871--2875."},{"key":"e_1_3_2_1_25_1","volume-title":"Neural sign language translation by learning tokenization. arXiv preprint arXiv:2002.00479","author":"Orbay Alptekin","year":"2020","unstructured":"Alptekin Orbay and Lale Akarun . 2020. Neural sign language translation by learning tokenization. arXiv preprint arXiv:2002.00479 ( 2020 ). Alptekin Orbay and Lale Akarun. 2020. Neural sign language translation by learning tokenization. arXiv preprint arXiv:2002.00479 (2020)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.365"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413931"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00429"},{"key":"e_1_3_2_1_30_1","unstructured":"Zhaofan Qiu Ting Yao and Tao Mei. 2017. Learning spatio-temporal representation with pseudo-3d residual networks. In CVPR. 5533--5541.  Zhaofan Qiu Ting Yao and Tao Mei. 2017. Learning spatio-temporal representation with pseudo-3d residual networks. In CVPR. 5533--5541."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00810"},{"key":"e_1_3_2_1_32_1","volume-title":"Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014a. Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014a. Two-stream convolutional networks for action recognition in videos. arXiv preprint arXiv:1406.2199 (2014)."},{"key":"e_1_3_2_1_33_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014b. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014b. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Ke Sun Bin Xiao Dong Liu and Jingdong Wang. 2019. Deep high-resolution representation learning for human pose estimation. In CVPR. 5693--5703.  Ke Sun Bin Xiao Dong Liu and Jingdong Wang. 2019. Deep high-resolution representation learning for human pose estimation. In CVPR. 5693--5703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969173"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2735952"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2011.6130267"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Sijie Yan Yuanjun Xiong and Dahua Lin. 2018. Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition. In AAAI.  Sijie Yan Yuanjun Xiong and Dahua Lin. 2018. Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition. In AAAI.","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_45"},{"key":"e_1_3_2_1_43_1","volume-title":"SF-Net: Structured Feature Network for Continuous Sign Language Recognition. arXiv preprint arXiv:1908.01341","author":"Yang Zhaoyang","year":"2019","unstructured":"Zhaoyang Yang , Zhenmei Shi , Xiaoyong Shen , and Yu-Wing Tai . 2019. SF-Net: Structured Feature Network for Continuous Sign Language Recognition. arXiv preprint arXiv:1908.01341 ( 2019 ). Zhaoyang Yang, Zhenmei Shi, Xiaoyong Shen, and Yu-Wing Tai. 2019. SF-Net: Structured Feature Network for Continuous Sign Language Recognition. arXiv preprint arXiv:1908.01341 (2019)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2632856.2632931"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Hao Zhou Wengang Zhou Yun Zhou and Houqiang Li. 2020. Spatial-Temporal Multi-Cue Network for Continuous Sign Language Recognition.. In AAAI. 13009--13016.  Hao Zhou Wengang Zhou Yun Zhou and Houqiang Li. 2020. Spatial-Temporal Multi-Cue Network for Continuous Sign Language Recognition.. In AAAI. 13009--13016.","DOI":"10.1609\/aaai.v34i07.7001"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3059098"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475577","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475577","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:49:11Z","timestamp":1750193351000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475577"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":47,"alternative-id":["10.1145\/3474085.3475577","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475577","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}