{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T04:03:20Z","timestamp":1749269000291,"version":"3.41.0"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031915772","type":"print"},{"value":"9783031915789","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91578-9_23","type":"book-chapter","created":{"date-parts":[[2025,6,6]],"date-time":"2025-06-06T09:22:49Z","timestamp":1749201769000},"page":"299-309","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MOSAIC: Skeleton-Based Human Motion Recognition with\u00a0Compositional Representations"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3562-3710","authenticated-orcid":false,"given":"Federico Figari","family":"Tomenotti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6482-4768","authenticated-orcid":false,"given":"Nicoletta","family":"Noceti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"issue":"3","key":"23_CR1","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1177\/1747021820979518","volume":"74","author":"E Allingham","year":"2021","unstructured":"Allingham, E., Hammerschmidt, D., W\u00f6llner, C.: Time perception in human movement: effects of speed and agency on duration estimation. Q. J. Exp. Psychol. 74(3), 559\u2013572 (2021)","journal-title":"Q. J. Exp. Psychol."},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"van Amsterdam, B., Clarkson, M.J., Stoyanov, D.: Gesture recognition in robotic surgery: a review. IEEE Trans. Biomed. Eng. 68(6) (2021)","DOI":"10.1109\/TBME.2021.3054828"},{"key":"23_CR3","unstructured":"Cao, Z., Hidalgo Martinez, G., Simon, T., Wei, S., Sheikh, Y.A.: Openpose: realtime multi-person 2D pose estimation using part affinity fields. PAMI (2019)"},{"key":"23_CR4","doi-asserted-by":"publisher","unstructured":"Cardoso, D.B., Campos, L.C., Nascimento, E.R.: An action recognition approach with context and multiscale motion awareness. In: 2022 35th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI), vol.\u00a01, pp. 73\u201378 (2022). https:\/\/doi.org\/10.1109\/SIBGRAPI55357.2022.9991807","DOI":"10.1109\/SIBGRAPI55357.2022.9991807"},{"issue":"8","key":"23_CR5","doi-asserted-by":"publisher","first-page":"3863","DOI":"10.3390\/app12083863","volume":"12","author":"K Cie\u015blik","year":"2022","unstructured":"Cie\u015blik, K., \u0141opatka, M.J.: Research on speed and acceleration of hand movements as command signals for anthropomorphic manipulators as a master-slave system. Appl. Sci. 12(8), 3863 (2022)","journal-title":"Appl. Sci."},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jia, M., Lin, T.Y., Song, Y., Belongie, S.: Class-balanced loss based on effective number of samples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9268\u20139277 (2019)","DOI":"10.1109\/CVPR.2019.00949"},{"key":"23_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-030-58545-7_5","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Das","year":"2020","unstructured":"Das, S., Sharma, S., Dai, R., Br\u00e9mond, F., Thonnat, M.: VPN: learning video-pose embedding for activities of daily living. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 72\u201390. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_5"},{"key":"23_CR8","unstructured":"Duan, H., Wang, J., Chen, K., Lin, D.: DG-STGCN: dynamic spatial-temporal modeling for skeleton-based action recognition. arXiv preprint arXiv:2210.05895 (2022)"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Duan, H., Zhao, Y., Chen, K., Lin, D., Dai, B.: Revisiting skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2969\u20132978 (2022)","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Duan, K., Bai, S., Xie, L., Qi, H., Huang, Q., Tian, Q.: Centernet: keypoint triplets for object detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00667"},{"issue":"6","key":"23_CR11","doi-asserted-by":"publisher","first-page":"650","DOI":"10.1016\/j.conb.2006.10.005","volume":"16","author":"JR Flanagan","year":"2006","unstructured":"Flanagan, J.R., Bowman, M.C., Johansson, R.S.: Control strategies in object manipulation tasks. Curr. Opin. Neurobiol. 16(6), 650\u2013659 (2006)","journal-title":"Curr. Opin. Neurobiol."},{"issue":"7","key":"23_CR12","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0022045","volume":"6","author":"J Gaveau","year":"2011","unstructured":"Gaveau, J., Papaxanthis, C.: The temporal structure of vertical arm movements. PLoS ONE 6(7), e22045 (2011)","journal-title":"PLoS ONE"},{"key":"23_CR13","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/j.patrec.2019.03.016","volume":"137","author":"G Grossi","year":"2020","unstructured":"Grossi, G., Lanzarotti, R., Napoletano, P., Noceti, N., Odone, F.: Positive technology for elderly well-being: a review. Pattern Recogn. Lett. 137, 61\u201370 (2020)","journal-title":"Pattern Recogn. Lett."},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Hauser, K., Bretl, T., Harada, K., Latombe, J.C.: Using motion primitives in probabilistic sample-based planning for humanoid robots. In: Algorithmic Foundation of Robotics VII: Selected Contributions of the Seventh International Workshop on the Algorithmic Foundations of Robotics, pp. 507\u2013522. Springer (2008)","DOI":"10.1007\/978-3-540-68405-3_32"},{"issue":"7","key":"23_CR15","doi-asserted-by":"publisher","first-page":"2114","DOI":"10.1109\/TCSVT.2019.2912988","volume":"30","author":"Y Ji","year":"2019","unstructured":"Ji, Y., Yang, Y., Shen, F., Shen, H.T., Li, X.: A survey of human action analysis in HRI applications. IEEE Trans. Circuits Syst. Video Technol. 30(7), 2114\u20132128 (2019)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"2","key":"23_CR16","doi-asserted-by":"publisher","first-page":"3728","DOI":"10.1109\/LRA.2021.3064149","volume":"6","author":"JD Jones","year":"2021","unstructured":"Jones, J.D., Cortesa, C., Shelton, A., Landau, B., Khudanpur, S., Hager, G.D.: Fine-grained activity recognition for assembly videos. IEEE Robot. Autom. Lett. 6(2), 3728\u20133735 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"23_CR17","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"issue":"5","key":"23_CR18","doi-asserted-by":"publisher","first-page":"1366","DOI":"10.1007\/s11263-022-01594-9","volume":"130","author":"Y Kong","year":"2022","unstructured":"Kong, Y., Fu, Y.: Human action recognition and prediction: a survey. Int. J. Comput. Vision 130(5), 1366\u20131401 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Kulic, D., Nakamura, Y.: Scaffolding on-line segmentation of full body human motion patterns. In: 2008 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 2860\u20132866. IEEE (2008)","DOI":"10.1109\/IROS.2008.4650619"},{"issue":"3","key":"23_CR20","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1177\/0278364911426178","volume":"31","author":"D Kuli\u0107","year":"2012","unstructured":"Kuli\u0107, D., Ott, C., Lee, D., Ishikawa, J., Nakamura, Y.: Incremental learning of full body motion primitives and their sequencing through human motion observation. Int. J. Robot. Res. 31(3), 330\u2013345 (2012)","journal-title":"Int. J. Robot. Res."},{"key":"23_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107356","volume":"104","author":"J Li","year":"2020","unstructured":"Li, J., Xie, X., Pan, Q., Cao, Y., Zhao, Z., Shi, G.: SGM-Net: skeleton-guided multimodal network for action recognition. Pattern Recogn. 104, 107356 (2020)","journal-title":"Pattern Recogn."},{"key":"23_CR22","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, S., Chen, X., Zhang, Y., Wang, Y., Tian, Q.: Actional-structural graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3595\u20133603 (2019)","DOI":"10.1109\/CVPR.2019.00371"},{"issue":"10","key":"23_CR23","doi-asserted-by":"publisher","first-page":"2684","DOI":"10.1109\/TPAMI.2019.2916873","volume":"42","author":"J Liu","year":"2019","unstructured":"Liu, J., Shahroudy, A., Perez, M., Wang, G., Duan, L.Y., Kot, A.C.: NTU RGB+D 120: a large-scale benchmark for 3D human activity understanding. IEEE Trans. Pattern Anal. Mach. Intell. 42(10), 2684\u20132701 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"23_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108487","volume":"124","author":"V Mazzia","year":"2022","unstructured":"Mazzia, V., Angarano, S., Salvetti, F., Angelini, F., Chiaberge, M.: Action transformer: a self-attention model for short-time pose-based human action recognition. Pattern Recogn. 124, 108487 (2022)","journal-title":"Pattern Recogn."},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Nair, V., et al.: Action similarity judgment based on kinematic primitives. In: 2020 Joint IEEE 10th International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob), pp.\u00a01\u20138. IEEE (2020)","DOI":"10.1109\/ICDL-EpiRob48136.2020.9278047"},{"issue":"1585","key":"23_CR26","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1098\/rstb.2011.0123","volume":"367","author":"K Pastra","year":"2012","unstructured":"Pastra, K., Aloimonos, Y.: The minimalist grammar of action. Philos. Trans. R. Soc. B Biol. Sci. 367(1585), 103\u2013117 (2012)","journal-title":"Philos. Trans. R. Soc. B Biol. Sci."},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Petrovich, M., Black, M.J., Varol, G.: Action-conditioned 3D human motion synthesis with transformer VAE. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10985\u201310995 (2021)","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"23_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103219","volume":"208","author":"C Plizzari","year":"2021","unstructured":"Plizzari, C., Cannici, M., Matteucci, M.: Skeleton-based action recognition via spatial and temporal transformer networks. Comput. Vis. Image Underst. 208, 103219 (2021)","journal-title":"Comput. Vis. Image Underst."},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Punnakkal, A.R., Chandrasekaran, A., Athanasiou, N., Quiros-Ramirez, A., Black, M.J.: Babel: bodies, action and behavior with english labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 722\u2013731 (2021)","DOI":"10.1109\/CVPR46437.2021.00078"},{"key":"23_CR30","doi-asserted-by":"publisher","first-page":"58","DOI":"10.3389\/frobt.2019.00058","volume":"6","author":"F Rea","year":"2019","unstructured":"Rea, F., Vignolo, A., Sciutti, A., Noceti, N.: Human motion understanding for selecting action timing in collaborative human-robot interaction. Front. Robot. AI 6, 58 (2019)","journal-title":"Front. Robot. AI"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+D: a large scale dataset for 3D human activity analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"23_CR32","doi-asserted-by":"publisher","first-page":"9532","DOI":"10.1109\/TIP.2020.3028207","volume":"29","author":"L Shi","year":"2020","unstructured":"Shi, L., Zhang, Y., Cheng, J., Lu, H.: Skeleton-based action recognition with multi-stream adaptive graph convolutional networks. IEEE Trans. Image Process. 29, 9532\u20139545 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"23_CR33","doi-asserted-by":"crossref","unstructured":"Si, C., Jing, Y., Wang, W., Wang, L., Tan, T.: Skeleton-based action recognition with spatial reasoning and temporal stack learning. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 103\u2013118 (2018)","DOI":"10.1007\/978-3-030-01246-5_7"},{"key":"23_CR34","doi-asserted-by":"crossref","unstructured":"Stapel, J.C.: The development of action perception. In: Modelling Human Motion: From Human Perception to Robot Design, pp. 73\u2013101 (2020)","DOI":"10.1007\/978-3-030-46732-6_5"},{"issue":"6","key":"23_CR35","doi-asserted-by":"publisher","first-page":"1360","DOI":"10.1109\/TRO.2012.2210294","volume":"28","author":"F Stulp","year":"2012","unstructured":"Stulp, F., Theodorou, E.A., Schaal, S.: Reinforcement learning with sequences of motion primitives for robust manipulation. IEEE Trans. Rob. 28(6), 1360\u20131370 (2012)","journal-title":"IEEE Trans. Rob."},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Summers-Stay, D., Teo, C.L., Yang, Y., Ferm\u00fcller, C., Aloimonos, Y.: Using a minimal action grammar for activity understanding in the real world. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 4104\u20134111. IEEE (2012)","DOI":"10.1109\/IROS.2012.6385483"},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Tevet, G., Gordon, B., Hertz, A., Bermano, A.H., Cohen-Or, D.: Motionclip: exposing human motion generation to clip space. In: European Conference on Computer Vision, pp. 358\u2013374. Springer (2022)","DOI":"10.1007\/978-3-031-20047-2_21"},{"key":"23_CR38","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"23_CR39","doi-asserted-by":"crossref","unstructured":"Vignolo, A., Noceti, N., Sciutti, A., Odone, F., Sandini, G.: Learning dictionaries of kinematic primitives for action classification. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 5965\u20135972. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412363"},{"issue":"1","key":"23_CR40","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41598-020-60923-5","volume":"10","author":"F W\u00f6rg\u00f6tter","year":"2020","unstructured":"W\u00f6rg\u00f6tter, F., Ziaeetabar, F., Pfeiffer, S., Kaya, O., Kulvicius, T., Tamosiunaite, M.: Humans predict action using grammar-like structures. Sci. Rep. 10(1), 1\u201311 (2020)","journal-title":"Sci. Rep."},{"key":"23_CR41","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"issue":"7","key":"23_CR42","doi-asserted-by":"publisher","first-page":"1635","DOI":"10.1109\/TPAMI.2012.253","volume":"35","author":"Y Yang","year":"2012","unstructured":"Yang, Y., Saleemi, I., Shah, M.: Discovering motion primitives for unsupervised grouping and one-shot learning of human actions, gestures, and expressions. IEEE Trans. Pattern Anal. Mach. Intell. 35(7), 1635\u20131648 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"6","key":"23_CR43","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1038\/88486","volume":"4","author":"JM Zacks","year":"2001","unstructured":"Zacks, J.M., et al.: Human brain activity time-locked to perceptual event boundaries. Nat. Neurosci. 4(6), 651\u2013655 (2001)","journal-title":"Nat. Neurosci."},{"issue":"10","key":"23_CR44","doi-asserted-by":"publisher","first-page":"4501","DOI":"10.1007\/s00371-022-02603-1","volume":"39","author":"J Zhang","year":"2023","unstructured":"Zhang, J., Xie, W., Wang, C., Tu, R., Tu, Z.: Graph-aware transformer for skeleton-based action recognition. Vis. Comput. 39(10), 4501\u20134512 (2023)","journal-title":"Vis. Comput."},{"issue":"8","key":"23_CR45","doi-asserted-by":"publisher","first-page":"3047","DOI":"10.1109\/TNNLS.2019.2935173","volume":"31","author":"X Zhang","year":"2019","unstructured":"Zhang, X., Xu, C., Tian, X., Tao, D.: Graph edge convolutional neural networks for skeleton-based action recognition. IEEE Trans. Neural Netw. Learn. Syst. 31(8), 3047\u20133060 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91578-9_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,6]],"date-time":"2025-06-06T09:22:58Z","timestamp":1749201778000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91578-9_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031915772","9783031915789"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91578-9_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}