{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T12:40:12Z","timestamp":1755780012947,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,7]],"date-time":"2023-12-07T00:00:00Z","timestamp":1701907200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NUS Resilience & Growth Postdoctoral Fellowship","award":["A-0000065-99-00"],"award-info":[{"award-number":["A-0000065-99-00"]}]},{"name":"Vingroup Innovation Foundation (VINIF)","award":["VINIF.2022.DA00183"],"award-info":[{"award-number":["VINIF.2022.DA00183"]}]},{"name":"NUS Fellows Programme (Southeast Asia)"},{"name":"Singapore MOE AcRF Tier 1 ? FRC Funding","award":["A-0009473-01-00"],"award-info":[{"award-number":["A-0009473-01-00"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,7]]},"DOI":"10.1145\/3628797.3629017","type":"proceedings-article","created":{"date-parts":[[2023,12,6]],"date-time":"2023-12-06T15:25:34Z","timestamp":1701876334000},"page":"485-492","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Boosting Facial Landmark Detection via Self-supervised and Semi-supervised Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5531-4269","authenticated-orcid":false,"given":"Chau","family":"Nguyen Minh","sequence":"first","affiliation":[{"name":"Hanoi University of Science and Technology, Viet Nam"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5897-1939","authenticated-orcid":false,"given":"Toan","family":"Nguyen Ngoc","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology, Viet Nam"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9620-8780","authenticated-orcid":false,"given":"Tuyen","family":"Le Dinh","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology, Viet Nam"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9254-1327","authenticated-orcid":false,"given":"Sang","family":"Dinh Viet","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology, Viet Nam"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1004-3405","authenticated-orcid":false,"given":"Pooi-Mun","family":"Wong","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0566-6069","authenticated-orcid":false,"given":"Chin-Boon","family":"Chng","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9463-4781","authenticated-orcid":false,"given":"Chee-Kong","family":"Chui","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2023,12,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Mykhaylo Andriluka Leonid Pishchulin Peter Gehler and Bernt Schiele. 2014. 2d human pose estimation: New benchmark and state of the art analysis. In CVPR. 3686\u20133693.","DOI":"10.1109\/CVPR.2014.471"},{"key":"e_1_3_2_1_2_1","volume-title":"Surf: Speeded up robust features","author":"Bay Herbert","year":"2006","unstructured":"Herbert Bay, Tinne Tuytelaars, and Luc Van\u00a0Gool. 2006. Surf: Speeded up robust features. In ECCV. Springer, 404\u2013417."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Zhaowei Cai Avinash Ravichandran Subhransu Maji Charless Fowlkes Zhuowen Tu and Stefano Soatto. 2021. Exponential moving average normalization for self-supervised and semi-supervised learning. In CVPR. 194\u2013203.","DOI":"10.1109\/CVPR46437.2021.00026"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2021.3075644"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Zhe Cao Tomas Simon Shih-En Wei and Yaser Sheikh. 2017. Realtime multi-person 2d pose estimation using part affinity fields. In CVPR. 7291\u20137299.","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Xiaokang Chen Yuhui Yuan Gang Zeng and Jingdong Wang. 2021. Semi-supervised semantic segmentation with cross pseudo supervision. In CVPR. 2613\u20132622.","DOI":"10.1109\/CVPR46437.2021.00264"},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Pseudo-Labeling Based Practical Semi-Supervised Meta-Training for Few-Shot Learning. arXiv preprint arXiv:2207.06817","author":"Dong Xingping","year":"2022","unstructured":"Xingping Dong, Ling Shao, and Shengcai Liao. 2022. Pseudo-Labeling Based Practical Semi-Supervised Meta-Training for Few-Shot Learning. arXiv preprint arXiv:2207.06817 (2022)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Zhen-Hua Feng Josef Kittler Muhammad Awais Patrik Huber and Xiao-Jun Wu. 2018. Wing loss for robust facial landmark localisation with convolutional neural networks. In CVPR. 2235\u20132245.","DOI":"10.1109\/CVPR.2018.00238"},{"key":"e_1_3_2_1_10_1","volume-title":"PFLD: A practical facial landmark detector. arXiv preprint arXiv:1902.10859","author":"Guo Xiaojie","year":"2019","unstructured":"Xiaojie Guo, Siyuan Li, Jinke Yu, Jiawan Zhang, Jiayi Ma, Lin Ma, Wei Liu, and Haibin Ling. 2019. PFLD: A practical facial landmark detector. arXiv preprint arXiv:1902.10859 (2019)."},{"key":"e_1_3_2_1_11_1","unstructured":"Kaiming He Xinlei Chen Saining Xie Yanghao Li Piotr Doll\u00e1r and Ross Girshick. 2022. Masked autoencoders are scalable vision learners. In CVPR. 16000\u201316009."},{"key":"e_1_3_2_1_12_1","volume-title":"Workshop on faces in\u2019Real-Life\u2019Images: detection, alignment, and recognition.","author":"Huang B","year":"2008","unstructured":"Gary\u00a0B Huang, Marwan Mattar, Tamara Berg, and Eric Learned-Miller. 2008. Labeled faces in the wild: A database forstudying face recognition in unconstrained environments. In Workshop on faces in\u2019Real-Life\u2019Images: detection, alignment, and recognition."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2011.6130513"},{"key":"e_1_3_2_1_14_1","unstructured":"Jiefeng Li Siyuan Bian Ailing Zeng Can Wang Bo Pang Wentao Liu and Cewu Lu. 2021. Human pose regression with residual log-likelihood estimation. In ICCV. 11025\u201311034."},{"key":"e_1_3_2_1_15_1","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie Lubomir Bourdev Ross Girshick James Hays Pietro Perona Deva Ramanan C.\u00a0Lawrence Zitnick and Piotr Doll\u00e1r. 2015. Microsoft COCO: Common Objects in Context. arxiv:1405.0312\u00a0[cs.CV]"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Tony Lindeberg. 2012. Scale invariant feature transform. (2012).","DOI":"10.4249\/scholarpedia.10491"},{"key":"e_1_3_2_1_17_1","unstructured":"Yuyuan Liu Yu Tian Yuanhong Chen Fengbei Liu Vasileios Belagiannis and Gustavo Carneiro. 2022. Perturbed and strict mean teachers for semi-supervised semantic segmentation. In CVPR. 4258\u20134267."},{"volume-title":"Stacked hourglass networks for human pose estimation","author":"Newell Alejandro","key":"e_1_3_2_1_18_1","unstructured":"Alejandro Newell, Kaiyu Yang, and Jia Deng. 2016. Stacked hourglass networks for human pose estimation. In ECCV. Springer, 483\u2013499."},{"key":"e_1_3_2_1_19_1","unstructured":"Chinese\u00a0Academy of\u00a0Sciences\u2019 Institute\u00a0of Automation.2004. CASIA-3D facev1 dataset. http:\/\/biometrics.idealtest.org\/ CASIA-3D facev1 dataset."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Yassine Ouali C\u00e9line Hudelot and Myriam Tami. 2020. Semi-supervised semantic segmentation with cross-consistency training. In CVPR. 12674\u201312684.","DOI":"10.1109\/CVPR42600.2020.01269"},{"key":"e_1_3_2_1_21_1","volume-title":"300 faces in-the-wild challenge: Database and results. Image and vision computing 47","author":"Sagonas Christos","year":"2016","unstructured":"Christos Sagonas, Epameinondas Antonakos, Georgios Tzimiropoulos, Stefanos Zafeiriou, and Maja Pantic. 2016. 300 faces in-the-wild challenge: Database and results. Image and vision computing 47 (2016), 3\u201318."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.59"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2013.132"},{"volume-title":"Frontal to profile face verification in the wild","author":"Sengupta Soumyadip","key":"e_1_3_2_1_24_1","unstructured":"Soumyadip Sengupta, Jun-Cheng Chen, Carlos Castillo, Vishal\u00a0M Patel, Rama Chellappa, and David\u00a0W Jacobs. 2016. Frontal to profile face verification in the wild. In WACV. IEEE, 1\u20139."},{"key":"e_1_3_2_1_25_1","volume-title":"An image is worth 16x16 words, what is a video worth?arXiv preprint arXiv:2103.13915","author":"Sharir Gilad","year":"2021","unstructured":"Gilad Sharir, Asaf Noy, and Lihi Zelnik-Manor. 2021. An image is worth 16x16 words, what is a video worth?arXiv preprint arXiv:2103.13915 (2021)."},{"key":"e_1_3_2_1_26_1","first-page":"596","article-title":"Fixmatch: Simplifying semi-supervised learning with consistency and confidence","volume":"33","author":"Sohn Kihyuk","year":"2020","unstructured":"Kihyuk Sohn, David Berthelot, Nicholas Carlini, Zizhao Zhang, Han Zhang, Colin\u00a0A Raffel, Ekin\u00a0Dogus Cubuk, Alexey Kurakin, and Chun-Liang Li. 2020. Fixmatch: Simplifying semi-supervised learning with consistency and confidence. NeurIPS 33 (2020), 596\u2013608.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Ke Sun Bin Xiao Dong Liu and Jingdong Wang. 2019. Deep high-resolution representation learning for human pose estimation. In CVPR. 5693\u20135703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Xiao Sun Jiaxiang Shang Shuang Liang and Yichen Wei. 2017. Compositional human pose regression. In ICCV. 2602\u20132611.","DOI":"10.1109\/ICCV.2017.284"},{"key":"e_1_3_2_1_29_1","volume-title":"Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. NeurIPS 30","author":"Tarvainen Antti","year":"2017","unstructured":"Antti Tarvainen and Harri Valpola. 2017. Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. NeurIPS 30 (2017)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Xinyao Wang Liefeng Bo and Li Fuxin. 2019. Adaptive wing loss for robust face alignment via heatmap regression. In ICCV. 6971\u20136981.","DOI":"10.1109\/ICCV.2019.00707"},{"key":"e_1_3_2_1_31_1","unstructured":"Shih-En Wei Varun Ramakrishna Takeo Kanade and Yaser Sheikh. 2016. Convolutional pose machines. In CVPR. 4724\u20134732."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2017.2737827"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.05.072"},{"key":"e_1_3_2_1_34_1","first-page":"38571","article-title":"Vitpose: Simple vision transformer baselines for human pose estimation","volume":"35","author":"Xu Yufei","year":"2022","unstructured":"Yufei Xu, Jing Zhang, Qiming Zhang, and Dacheng Tao. 2022. Vitpose: Simple vision transformer baselines for human pose estimation. NeurIPS 35 (2022), 38571\u201338584.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_35_1","volume-title":"Hrformer: High-resolution transformer for dense prediction. arXiv preprint arXiv:2110.09408","author":"Yuan Yuhui","year":"2021","unstructured":"Yuhui Yuan, Rao Fu, Lang Huang, Weihong Lin, Chao Zhang, Xilin Chen, and Jingdong Wang. 2021. Hrformer: High-resolution transformer for dense prediction. arXiv preprint arXiv:2110.09408 (2021)."},{"key":"e_1_3_2_1_36_1","unstructured":"Tianyue Zheng and Weihong Deng. 2018. Cross-pose lfw: A database for studying cross-pose face recognition in unconstrained environments. Beijing University of Posts and Telecommunications Tech. Rep 5 7 (2018)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Ren Zhiyao Dinh Sang Wong Pooi-Mun Chng Chin-Boon Too Joan Foong Theng-Wai Loh Will and Chui Chee-Kong. 2023. G2LCPS: End-to-end Semi-supervised Landmark Prediction with Global-to-local Cross Pseudo Supervision for Airway Difficulty Assessment. (2023).","DOI":"10.1016\/j.compbiomed.2024.109246"},{"key":"e_1_3_2_1_38_1","unstructured":"Shizhan Zhu Cheng Li Chen-Change Loy and Xiaoou Tang. 2016. Unconstrained face alignment via cascaded compositional learning. In CVPR. 3409\u20133417."},{"key":"e_1_3_2_1_39_1","volume-title":"Face Alignment Across Large Poses: A 3D Solution. CoRR abs\/1511.07212","author":"Zhu Xiangyu","year":"2015","unstructured":"Xiangyu Zhu, Zhen Lei, Xiaoming Liu, Hailin Shi, and Stan\u00a0Z. Li. 2015. Face Alignment Across Large Poses: A 3D Solution. CoRR abs\/1511.07212 (2015). arxiv:1511.07212http:\/\/arxiv.org\/abs\/1511.07212"},{"key":"e_1_3_2_1_40_1","volume-title":"Pseudoseg: Designing pseudo labels for semantic segmentation. arXiv preprint arXiv:2010.09713","author":"Zou Yuliang","year":"2020","unstructured":"Yuliang Zou, Zizhao Zhang, Han Zhang, Chun-Liang Li, Xiao Bian, Jia-Bin Huang, and Tomas Pfister. 2020. Pseudoseg: Designing pseudo labels for semantic segmentation. arXiv preprint arXiv:2010.09713 (2020)."}],"event":{"name":"SOICT 2023: The 12th International Symposium on Information and Communication Technology","acronym":"SOICT 2023","location":"Ho Chi Minh Vietnam"},"container-title":["Proceedings of the 12th International Symposium on Information and Communication Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3628797.3629017","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3628797.3629017","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T12:24:38Z","timestamp":1755779078000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3628797.3629017"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,7]]},"references-count":40,"alternative-id":["10.1145\/3628797.3629017","10.1145\/3628797"],"URL":"https:\/\/doi.org\/10.1145\/3628797.3629017","relation":{},"subject":[],"published":{"date-parts":[[2023,12,7]]},"assertion":[{"value":"2023-12-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}