{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T22:32:31Z","timestamp":1743114751859,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819786190"},{"type":"electronic","value":"9789819786206"}],"license":[{"start":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T00:00:00Z","timestamp":1729382400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T00:00:00Z","timestamp":1729382400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8620-6_8","type":"book-chapter","created":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T21:02:10Z","timestamp":1729371730000},"page":"114-128","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Multi-modal Human-Centric Contrastive Pre-training with a Pseudo Body-Structured Prior"],"prefix":"10.1007","author":[{"given":"Yihang","family":"Meng","sequence":"first","affiliation":[]},{"given":"Hao","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Zihua","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Hongyuan","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Xiuxian","family":"Lao","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,20]]},"reference":[{"key":"8_CR1","first-page":"25","volume":"33","author":"JB Alayrac","year":"2020","unstructured":"Alayrac, J.B., Recasens, A., Schneider, R., Arandjelovi\u0107, R., Ramapuram, J., De Fauw, J., Smaira, L., Dieleman, S., Zisserman, A.: Self-supervised multimodal versatile networks. Adv. Neural. Inf. Process. Syst. 33, 25\u201337 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Andriluka, M., Pishchulin, L., Gehler, P., Schiele, B.: 2d human pose estimation: new benchmark and state of the art analysis. In: Proceedings of the IEEE Conference on computer Vision and Pattern Recognition, pp. 3686\u20133693 (2014)","key":"8_CR2","DOI":"10.1109\/CVPR.2014.471"},{"issue":"12","key":"8_CR3","doi-asserted-by":"publisher","first-page":"2799","DOI":"10.1109\/TPAMI.2017.2769085","volume":"40","author":"H Bilen","year":"2017","unstructured":"Bilen, H., Fernando, B., Gavves, E., Vedaldi, A.: Action recognition with dynamic image networks. IEEE Trans. Pattern Anal. Mach. Intell. 40(12), 2799\u20132813 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"doi-asserted-by":"crossref","unstructured":"Chen, D., Wu, L., Chen, Z., Lin, X.: Cthpose: an efficient and effective CNN-transformer hybrid network for human pose estimation. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 327\u2013339. Springer (2023)","key":"8_CR4","DOI":"10.1007\/978-981-99-8469-5_26"},{"doi-asserted-by":"crossref","unstructured":"Cheng, J., Cheng, Q., Yang, M., Liu, Z., Zhang, Q., Cheng, J.: Mixpose: 3d human pose estimation with mixed encoder. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 353\u2013364. Springer (2023)","key":"8_CR5","DOI":"10.1007\/978-981-99-8543-2_29"},{"issue":"4","key":"8_CR6","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-023-2444-y","volume":"18","author":"Z Fu","year":"2024","unstructured":"Fu, Z., Liu, F., Xu, Q., Fu, X., Qi, J.: LMR-CBT: Learning modality-fused representations with CB-transformer for multimodal emotion recognition from unaligned multimodal sequences. Front. Comput. Sci. 18(4), 184314 (2024)","journal-title":"Front. Comput. Sci."},{"doi-asserted-by":"crossref","unstructured":"G\u00fcler, R.A., Neverova, N., Kokkinos, I.: Densepose: dense human pose estimation in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7297\u20137306 (2018)","key":"8_CR7","DOI":"10.1109\/CVPR.2018.00762"},{"doi-asserted-by":"crossref","unstructured":"Haque, A., Peng, B., Luo, Z., Alahi, A., Yeung, S., Fei-Fei, L.: Towards viewpoint invariant 3d human pose estimation. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 160\u2013177. Springer (2016)","key":"8_CR8","DOI":"10.1007\/978-3-319-46448-0_10"},{"doi-asserted-by":"crossref","unstructured":"Hong, F., Pan, L., Cai, Z., Liu, Z.: Versatile multi-modal pre-training for human-centric perception. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16156\u201316166 (2022)","key":"8_CR9","DOI":"10.1109\/CVPR52688.2022.01568"},{"doi-asserted-by":"crossref","unstructured":"Huan, R., Chen, T., Zhan, Z., Chen, P., Liang, R.: Hpan: a hybrid pose attention network for person re-identification. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 198\u2013211. Springer (2023)","key":"8_CR10","DOI":"10.1007\/978-981-99-8555-5_16"},{"doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2013)","key":"8_CR11","DOI":"10.1109\/TPAMI.2013.248"},{"unstructured":"Liu, H., Liu, T., Chen, Y., Zhang, Z., Li, Y.F.: Ehpe: skeleton cues-based gaussian coordinate encoding for efficient human pose estimation. IEEE Trans. Multimed. (2022)","key":"8_CR12"},{"doi-asserted-by":"crossref","unstructured":"Luo, J., Zhou, L., Zhu, G., Ge, G., Yang, B., Wang, J.: Temporal-channel topology enhanced network for skeleton-based action recognition. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 109\u2013119. Springer (2023)","key":"8_CR13","DOI":"10.1007\/978-981-99-8429-9_9"},{"doi-asserted-by":"crossref","unstructured":"Mai, S., Hu, H., Xing, S.: Modality to modality translation: an adversarial representation learning and graph fusion network for multimodal fusion. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 164\u2013172 (2020)","key":"8_CR14","DOI":"10.1609\/aaai.v34i01.5347"},{"issue":"4","key":"8_CR15","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-022-2050-4","volume":"17","author":"M Maqsood","year":"2023","unstructured":"Maqsood, M., Yasmin, S., Gillani, S., Bukhari, M., Rho, S., Yeo, S.S.: An efficient deep learning-assisted person re-identification solution for intelligent video surveillance in smart cities. Front. Comput. Sci. 17(4), 174329 (2023)","journal-title":"Front. Comput. Sci."},{"doi-asserted-by":"crossref","unstructured":"Patrick, M., Asano, Y.M., Kuznetsova, P., Fong, R., Henriques, J.F., Zweig, G., Vedaldi, A.: On compositions of transformations in contrastive self-supervised learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9577\u20139587 (2021)","key":"8_CR16","DOI":"10.1109\/ICCV48922.2021.00944"},{"doi-asserted-by":"crossref","unstructured":"Qing, Y., Wu, N., Wan, S., Duan, L.: Multi-modal instance refinement for cross-domain action recognition. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 284\u2013296. Springer (2023)","key":"8_CR17","DOI":"10.1007\/978-981-99-8429-9_23"},{"unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning. pp. 8748\u20138763. PMLR (2021)","key":"8_CR18"},{"doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: Ntu rgb+ d: A large scale dataset for 3d human activity analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1010\u20131019 (2016)","key":"8_CR19","DOI":"10.1109\/CVPR.2016.115"},{"doi-asserted-by":"crossref","unstructured":"Shi, L., Wang, L., Long, C., Zhou, S., Zhou, M., Niu, Z., Hua, G.: Sgcn: sparse graph convolution network for pedestrian trajectory prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8994\u20139003 (2021)","key":"8_CR20","DOI":"10.1109\/CVPR46437.2021.00888"},{"doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","key":"8_CR21","DOI":"10.1109\/CVPR.2019.00584"},{"doi-asserted-by":"crossref","unstructured":"Tan, F., Tang, D., Dou, M., Guo, K., Pandey, R., Keskin, C., Du, R., Sun, D., Bouaziz, S., Fanello, S., et\u00a0al.: Humangps: Geodesic preserving feature for dense human correspondences. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1820\u20131830 (2021)","key":"8_CR22","DOI":"10.1109\/CVPR46437.2021.00186"},{"doi-asserted-by":"crossref","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive multiview coding. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XI 16, pp. 776\u2013794. Springer (2020)","key":"8_CR23","DOI":"10.1007\/978-3-030-58621-8_45"},{"doi-asserted-by":"crossref","unstructured":"Wang, H., Guo, X., Deng, Z.H., Lu, Y.: Rethinking minimal sufficient representation in contrastive learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16041\u201316050 (2022)","key":"8_CR24","DOI":"10.1109\/CVPR52688.2022.01557"},{"issue":"4","key":"8_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00138-023-01392-4","volume":"34","author":"Y Wei","year":"2023","unstructured":"Wei, Y., Liu, L., Fu, X., Liu, L., Peng, W.: Crowded pose-guided multi-task learning for instance-level human parsing. Mach. Vis. Appl. 34(4), 1\u201315 (2023)","journal-title":"Mach. Vis. Appl."},{"doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S.X., Lin, D.: Unsupervised feature learning via non-parametric instance discrimination. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3733\u20133742 (2018)","key":"8_CR26","DOI":"10.1109\/CVPR.2018.00393"},{"unstructured":"Yang, D., Li, M.M., Fu, H., Fan, J., Leung, H.: Centrality graph convolutional networks for skeleton-based action recognition. arXiv preprint arXiv:2003.030072 (2020)","key":"8_CR27"},{"issue":"1","key":"8_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-023-3186-6","volume":"18","author":"Y Yang","year":"2024","unstructured":"Yang, Y., Guo, J., Li, G., Li, L., Li, W., Yang, J.: Alignment efficient image-sentence retrieval considering transferable cross-modal representation learning. Front. Comput. Sci. 18(1), 181335 (2024)","journal-title":"Front. Comput. Sci."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8620-6_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,14]],"date-time":"2025-01-14T20:17:13Z","timestamp":1736885833000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8620-6_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,20]]},"ISBN":["9789819786190","9789819786206"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8620-6_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,20]]},"assertion":[{"value":"20 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}