{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T17:59:53Z","timestamp":1757699993131,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"This work was supported in part by the National Natural Science Foundation of China under Grants (62071067, 62201072, 62171057, 62101064, 62001054), China Postdoctoral Science Foundation under Grant 2022M710468, Beijing University of Posts and Telecommunications-China Mobile Research Institute Joint Innovation Center, BUPT innovation and entrepreneurship support program.","award":["62071067, 62201072, 62171057, 62101064, 62001054, 2022M710468"],"award-info":[{"award-number":["62071067, 62201072, 62171057, 62101064, 62001054, 2022M710468"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3587429","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"3883-3891","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["SA-Fusion: Multimodal Fusion Approach for Web-based Human-Computer Interaction in the Wild"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4955-1934","authenticated-orcid":false,"given":"Xingyu","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1691-6457","authenticated-orcid":false,"given":"Pengfei","family":"Ren","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6930-7209","authenticated-orcid":false,"given":"Yuchen","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0184-5965","authenticated-orcid":false,"given":"Cong","family":"Liu","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4627-6307","authenticated-orcid":false,"given":"Jing","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3072-7422","authenticated-orcid":false,"given":"Haifeng","family":"Sun","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0829-4624","authenticated-orcid":false,"given":"Qi","family":"Qi","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2182-2228","authenticated-orcid":false,"given":"Jingyu","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Designing Novel Interactions: 11th International Conference, UAHCI 2017, Held as Part of HCI International 2017, Vancouver, BC, Canada, July 9\u201314, 2017, Proceedings, Part II 11","author":"Barone\u00a0Rodrigues Andr\u00e9","year":"2017","unstructured":"Andr\u00e9 Barone\u00a0Rodrigues, Diego R\u00a0Colombo Dias, Val\u00e9ria\u00a0Farinazzo Martins, Paulo\u00a0Alexandre Bressan, and Marcelo de Paiva\u00a0Guimar\u00e3es. 2017. WebAR: A web-augmented reality-based authoring tool with experience API support for educational applications. In Universal Access in Human\u2013Computer Interaction. Designing Novel Interactions: 11th International Conference, UAHCI 2017, Held as Part of HCI International 2017, Vancouver, BC, Canada, July 9\u201314, 2017, Proceedings, Part II 11. Springer, 118\u2013128."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00700"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00893"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_33"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01307"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2863540"},{"key":"e_1_3_2_1_7_1","volume-title":"Joint hand detection and rotation estimation using CNN","author":"Deng Xiaoming","year":"2017","unstructured":"Xiaoming Deng, Yinda Zhang, Shuo Yang, Ping Tan, Liang Chang, Ye Yuan, and Hongan Wang. 2017. Joint hand detection and rotation estimation using CNN. IEEE transactions on image processing 27, 4 (2017), 1888\u20131900."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00664"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01013"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_8"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.02.066"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01109"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_29"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00152"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_23"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01081"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01208"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00065"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803025"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings, Part XXV 16","author":"Huang Lin","year":"2020","unstructured":"Lin Huang, Jianchao Tan, Ji Liu, and Junsong Yuan. 2020. Hand-transformer: non-autoregressive structured modeling for 3d hand pose estimation. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXV 16. Springer, 17\u201333."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6761"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV). 118\u2013134","author":"Iqbal Umar","year":"2018","unstructured":"Umar Iqbal, Pavlo Molchanov, Thomas Breuel\u00a0Juergen Gall, and Jan Kautz. 2018. Hand pose estimation via latent 2.5 d heatmap regression. In Proceedings of the European Conference on Computer Vision (ECCV). 118\u2013134."},{"key":"e_1_3_2_1_26_1","volume-title":"ultralytics\/yolov5: v5. 0-YOLOv5-P6 1280 models AWS Supervise. ly and YouTube integrations. Zenodo 11","author":"Jocher Glenn","year":"2021","unstructured":"Glenn Jocher, Alex Stoken, Jirka Borovec, Ayush Chaurasia, Liu Changyu, A Laughing, A Hogan, J Hajek, L Diaconu, Y Marc, 2021. ultralytics\/yolov5: v5. 0-YOLOv5-P6 1280 models AWS Supervise. ly and YouTube integrations. Zenodo 11 (2021)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00504"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01270"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01445"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449890"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00714"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3122874"},{"key":"e_1_3_2_1_34_1","unstructured":"Arpit Mittal Andrew Zisserman and Philip\u00a0HS Torr. 2011. Hand detection using multiple proposals.. In Bmvc Vol.\u00a02. 5."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_44"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00966"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.3390\/s19040866"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00155"},{"key":"e_1_3_2_1_39_1","volume-title":"SRN: Stacked Regression Network for Real-time 3D Hand Pose Estimation.. In BMVC. 112.","author":"Ren Pengfei","year":"2019","unstructured":"Pengfei Ren, Haifeng Sun, Qi Qi, Jingyu Wang, and Weiting Huang. 2019. SRN: Stacked Regression Network for Real-time 3D Hand Pose Estimation.. In BMVC. 112."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings, Part XVII 16","author":"Spurr Adrian","year":"2020","unstructured":"Adrian Spurr, Umar Iqbal, Pavlo Molchanov, Otmar Hilliges, and Jan Kautz. 2020. Weakly supervised 3d hand pose estimation via biomechanical constraints. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVII 16. Springer, 211\u2013228."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3569499"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 1664\u20131674","author":"Ho\u00a0Elden Tse Tze","year":"2022","unstructured":"Tze Ho\u00a0Elden Tse, Kwang\u00a0In Kim, Ales Leonardis, and Hyung\u00a0Jin Chang. 2022. Collaborative learning for hand and object reconstruction with attention-guided graph convolution. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 1664\u20131674."},{"key":"e_1_3_2_1_43_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_44_1","volume-title":"Deep multimodal fusion by channel exchanging. Advances in neural information processing systems 33","author":"Wang Yikai","year":"2020","unstructured":"Yikai Wang, Wenbing Huang, Fuchun Sun, Tingyang Xu, Yu Rong, and Junzhou Huang. 2020. Deep multimodal fusion by channel exchanging. Advances in neural information processing systems 33 (2020), 4835\u20134845."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of Asian Conference on Computer Vision. 1106\u20131111","author":"Wu Ying","year":"2000","unstructured":"Ying Wu, Qiong Liu, and Thomas\u00a0S Huang. 2000. An adaptive self-organizing color segmentation algorithm with application to robust real-time human hand localization. In Proceedings of Asian Conference on Computer Vision. 1106\u20131111."},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the Asian Conference on Computer Vision.","author":"Wu Zongwei","year":"2020","unstructured":"Zongwei Wu, Guillaume Allibert, Christophe Stolz, and C\u00e9dric Demonceaux. 2020. Depth-adapted CNN for RGB-D cameras. In Proceedings of the Asian Conference on Computer Vision."},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings, Part XIX 16","author":"Xing Yajie","year":"2020","unstructured":"Yajie Xing, Jingbo Wang, and Gang Zeng. 2020. Malleable 2.5 d convolution: Learning receptive fields along the depth-axis for rgb-d scene parsing. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIX 16. Springer, 555\u2013571."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00088"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.3390\/s20216360"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR52148.2021.00024"},{"key":"e_1_3_2_1_52_1","volume-title":"Sixth IEEE International Conference on Automatic Face and Gesture Recognition, 2004. Proceedings. IEEE, 37\u201342","author":"Zhu Qiang","year":"2004","unstructured":"Qiang Zhu, Kwang-Ting Cheng, Ching-Tung Wu, and Yi-Leh Wu. 2004. Adaptive learning of an accurate skin-color model. In Sixth IEEE International Conference on Automatic Face and Gesture Recognition, 2004. Proceedings. IEEE, 37\u201342."}],"event":{"name":"WWW '23: The ACM Web Conference 2023","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Austin TX USA","acronym":"WWW '23"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3587429","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3587429","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:47Z","timestamp":1750186847000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3587429"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":52,"alternative-id":["10.1145\/3543507.3587429","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3587429","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}