{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:48:57Z","timestamp":1742914137733,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":31,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819784981"},{"type":"electronic","value":"9789819784998"}],"license":[{"start":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T00:00:00Z","timestamp":1729296000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T00:00:00Z","timestamp":1729296000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8499-8_33","type":"book-chapter","created":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T14:03:02Z","timestamp":1729260182000},"page":"479-493","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["AU-vMAE: Knowledge-Guide Action Units Detection via\u00a0Video Masked Autoencoder"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-7733-4684","authenticated-orcid":false,"given":"Qiaoqiao","family":"Jin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7335-7218","authenticated-orcid":false,"given":"Rui","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8345-8258","authenticated-orcid":false,"given":"Yishun","family":"Dou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5582-577X","authenticated-orcid":false,"given":"Bingbing","family":"Ni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,19]]},"reference":[{"key":"33_CR1","doi-asserted-by":"crossref","unstructured":"Chen, Y., Chen, D., Wang, Y., Wang, T., Liang, Y.: Cafgraph: context-aware facial multi-graph representation for facial action unit recognition. In: MM \u201921: ACM Multimedia Conference, pp. 1029\u20131037. ACM (2021)","DOI":"10.1145\/3474085.3475295"},{"key":"33_CR2","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Nagrani, A., Zisserman, A.: Voxceleb2: deep speaker recognition. In: Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, pp. 1086\u20131090. ISCA (2018)","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"33_CR3","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: A large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. Ieee (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"33_CR4","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16 x 16 words: transformers for image recognition at scale. In: 9th International Conference on Learning Representations, ICLR (2021)"},{"key":"33_CR5","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.B.: Masked autoencoders are scalable vision learners. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, pp. 15979\u201315988 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"33_CR6","unstructured":"Jacob, G.M., Stenger, B.: Facial action unit detection with transformers. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR (2021)"},{"key":"33_CR7","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: 3rd International Conference on Learning Representations, ICLR (2015)"},{"key":"33_CR8","doi-asserted-by":"publisher","unstructured":"Kollias, D.: ABAW: learning from synthetic data & multi-task learning challenges. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) Computer Vision \u2013 ECCV 2022 Workshops. ECCV 2022. Lecture Notes in Computer Science, vol. 13806. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-25075-0_12","DOI":"10.1007\/978-3-031-25075-0_12"},{"issue":"11","key":"33_CR9","doi-asserted-by":"publisher","first-page":"2583","DOI":"10.1109\/TPAMI.2018.2791608","volume":"40","author":"W Li","year":"2018","unstructured":"Li, W., Abtahi, F., Zhu, Z., Yin, L.: Eac-net: deep nets with enhancing and cropping for facial action unit detection. IEEE Trans. Pattern Anal. Mach. Intell. 40(11), 2583\u20132596 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"33_CR10","doi-asserted-by":"crossref","unstructured":"Luo, C., Song, S., Xie, W., Shen, L., Gunes, H.: Learning multi-dimensional edge feature-based AU relation graph for facial action unit recognition. In: Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI 2022, pp. 1239\u20131246 (2022)","DOI":"10.24963\/ijcai.2022\/173"},{"key":"33_CR11","unstructured":"Ma, B., An, R., Zhang, W., Ding, Y., Zhao, Z., Zhang, R., Lv, T., Fan, C., Hu, Z.: Facial action unit detection and intensity estimation from self-supervised representation (2022). CoRR abs\/2210.15878"},{"key":"33_CR12","doi-asserted-by":"crossref","unstructured":"Mart\u00ednez, B., Valstar, M.F., Jiang, B., Pantic, M.: Automatic analysis of facial actions: a survey 10(3), 325\u2013347 (2019)","DOI":"10.1109\/TAFFC.2017.2731763"},{"key":"33_CR13","doi-asserted-by":"crossref","unstructured":"Mavadati, S.M., Mahoor, M.H., Bartlett, K., Trinh, P., Cohn, J.F.: DISFA: a spontaneous facial action intensity database. IEEE Trans. Affect. Comput., 151\u2013160 (2013)","DOI":"10.1109\/T-AFFC.2013.4"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Niu, X., Han, H., Yang, S., Huang, Y., Shan, S.: Local relationship learning with person-specific shape regularization for facial action unit detection. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR (2019)","DOI":"10.1109\/CVPR.2019.01219"},{"key":"33_CR15","unstructured":"Rosenberg, E.L., Ekman, P.: What the face reveals: Basic and applied studies of spontaneous expression using the Facial Action Coding System (FACS) (2020)"},{"key":"33_CR16","unstructured":"R\u00f6ssler, A., Cozzolino, D., Verdoliva, L., Riess, C., Thies, J., Nie\u00dfner, M.: Faceforensics: a large-scale video dataset for forgery detection in human faces (2018)"},{"key":"33_CR17","doi-asserted-by":"publisher","unstructured":"Shao, Z., Liu, Z., Cai, J., Ma, L.: Deep adaptive attention for joint facial action unit detection and face alignment. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 725\u2013740. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_43","DOI":"10.1007\/978-3-030-01261-8_43"},{"key":"33_CR18","doi-asserted-by":"crossref","unstructured":"Song, T., Cui, Z., Zheng, W., Ji, Q.: Hybrid message passing with performance-driven structures for facial action unit detection. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6263\u20136272 (2021)","DOI":"10.1109\/CVPR46437.2021.00620"},{"key":"33_CR19","doi-asserted-by":"crossref","unstructured":"Takikawa, T., Evans, A., Tremblay, J., M\u00fcller, T., McGuire, M., Jacobson, A., Fidler, S.: Variable bitrate neural fields. In: SIGGRAPH \u201922: Special Interest Group on Computer Graphics and Interactive Techniques Conference. ACM (2022)","DOI":"10.1145\/3528233.3530727"},{"key":"33_CR20","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: VideoMAE: masked autoencoders are data-efficient learners for self-supervised video pre-training. In: Advances in Neural Information Processing Systems (2022)"},{"key":"33_CR21","doi-asserted-by":"crossref","unstructured":"Valstar, M.F., Pantic, M.: Fully automatic facial action unit detection and temporal analysis. In: IEEE CVPR Workshops 2006, p.\u00a0149. IEEE Computer Society (2006)","DOI":"10.1109\/CVPRW.2006.85"},{"key":"33_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1007\/978-3-030-58589-1_42","volume-title":"Computer Vision \u2013 ECCV 2020","author":"K Wang","year":"2020","unstructured":"Wang, K., Wu, Q., Song, L., Yang, Z., Wu, W., Qian, C., He, R., Qiao, Yu., Loy, C.C.: MEAD: a large-scale audio-visual dataset for emotional talking-face generation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12366, pp. 700\u2013717. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58589-1_42"},{"key":"33_CR23","doi-asserted-by":"crossref","unstructured":"Wang, L., Tong, Z., Ji, B., Wu, G.: TDN: temporal difference networks for efficient action recognition. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, pp. 1895\u20131904 (2021)","DOI":"10.1109\/CVPR46437.2021.00193"},{"key":"33_CR24","unstructured":"Wang, Z., Song, S., Luo, C., Zhou, Y., Wu, S., Xie, W., Shen, L.: Spatio-temporal AU relational graph representation learning for facial action units detection (2023). CoRR arXiv:abs\/2303.10644"},{"key":"33_CR25","doi-asserted-by":"crossref","unstructured":"Xie, L., Wang, X., Zhang, H., Dong, C., Shan, Y.: Vfhq: a high-quality dataset and benchmark for video face super-resolution. In: The IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW) (2022)","DOI":"10.1109\/CVPRW56347.2022.00081"},{"key":"33_CR26","doi-asserted-by":"crossref","unstructured":"Yang, H., Yin, L., Zhou, Y., Gu, J.: Exploiting semantic embedding and visual feature for facial action unit detection. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, pp. 10482\u201310491 (2021)","DOI":"10.1109\/CVPR46437.2021.01034"},{"key":"33_CR27","unstructured":"Yin, Y., Tran, M., Chang, D., Wang, X., Soleymani, M.: Multi-modal facial action unit detection with large pre-trained models for the 5th competition on affective behavior analysis in-the-wild (2023). arXiv:2303.10590"},{"issue":"10","key":"33_CR28","doi-asserted-by":"publisher","first-page":"692","DOI":"10.1016\/j.imavis.2014.06.002","volume":"32","author":"X Zhang","year":"2014","unstructured":"Zhang, X., Yin, L., Cohn, J.F., Canavan, S.J., Reale, M., Horowitz, A., Liu, P., Girard, J.M.: Bp4d-spontaneous: a high-resolution spontaneous 3d dynamic facial expression database. Image Vis. Comput. 32(10), 692\u2013706 (2014)","journal-title":"Image Vis. Comput."},{"key":"33_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Wang, T., Yin, L.: Region of Interest Based Graph Convolution: A Heatmap Regression Approach for Action Unit Detection, pp. 2890\u20132898 (2020)","DOI":"10.1145\/3394171.3413674"},{"key":"33_CR30","doi-asserted-by":"crossref","unstructured":"Zhao, K., Chu, W., Zhang, H.: Deep region and multi-label learning for facial action unit detection. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016. pp. 3391\u20133399 (2016)","DOI":"10.1109\/CVPR.2016.369"},{"key":"33_CR31","doi-asserted-by":"publisher","unstructured":"Zhu, H., et al.: CelebV-HQ: a large-scale video facial attributes dataset. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. Lecture Notes in Computer Science, vol. 13667. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20071-7_38","DOI":"10.1007\/978-3-031-20071-7_38"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8499-8_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T14:16:09Z","timestamp":1729260969000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8499-8_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,19]]},"ISBN":["9789819784981","9789819784998"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8499-8_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,19]]},"assertion":[{"value":"19 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}