{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T05:19:53Z","timestamp":1743139193651,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031646041"},{"type":"electronic","value":"9783031646058"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-64605-8_12","type":"book-chapter","created":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T05:01:59Z","timestamp":1719810119000},"page":"165-179","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi Modal Aware Transformer Network for\u00a0Effective Daily Life Human Action Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2632-8544","authenticated-orcid":false,"given":"Hend","family":"Basly","sequence":"first","affiliation":[]},{"given":"Mohamed Amine","family":"Zayene","sequence":"additional","affiliation":[]},{"given":"Fatma Ezahra","family":"Sayadi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1492\u20131500 (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"12_CR4","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. Adv. Neural Inf. Process. Syst. 27 (2014)"},{"key":"12_CR5","unstructured":"Cheron, G., Laptev, I., Schmid, C.: P-CNN: pose-based CNN features for action recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 218\u20133226 (2017)"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Ercolano, G., Riccio, D., Rossi, S.: Two deep approaches for ADL recognition: a multi-scale LSTM and a CNN-LSTM with a 3D matrix skeleton representation. In: 26th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN), pp. 877\u2013882 (2017)","DOI":"10.1109\/ROMAN.2017.8172406"},{"key":"12_CR7","unstructured":"Abu-El-Haija, S., et al.: YouTube-8M: a large-scale video classification benchmark. arXiv preprint arXiv:1609.08675 (2016)"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Das, S., Koperski, M., Bremond, F., Francesca, G.: Action recognition based on a mixture of RGB and depth-based skeletons In: 2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS), pp. 1\u20136 IEEE (2017)","DOI":"10.1109\/AVSS.2017.8078548"},{"key":"12_CR9","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. arXiv:1609.02907"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Tang, Y., Tian, Y., Lu, J., Li, P., Zhou, J.: Deep progressive reinforcement learning for skeleton-based action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5323\u20135332 (2018)","DOI":"10.1109\/CVPR.2018.00558"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Shi, L., Zhang, Y., Cheng, J., Lu, H.: Skeleton-based action recognition with directed graph neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7912\u20137921 (2019)","DOI":"10.1109\/CVPR.2019.00810"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Thirty-second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: An End-to-End Spatio-temporal attention model for human action recognition from skeleton data. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 31 (2017)","DOI":"10.1609\/aaai.v31i1.11212"},{"key":"12_CR14","unstructured":"Sharma, S., Kiros, R., Salakhutdinov, R.: Action recognition using visual attention. arXiv:1511.04119 (2015)"},{"key":"12_CR15","unstructured":"Girdhar, R., Ramanan, D.: Attentional pooling for action recognition. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Long, X., Gan, C., De Melo, G., Wu, J., Liu, X., Wen, S.: Attention clusters: Purely attention based local feature integration for video classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7834\u20137843 (2018)","DOI":"10.1109\/CVPR.2018.00817"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Baradel, F., Wolf, C., Mille, J., Taylor, G.W.: Glimpse clouds: human activity recognition from unstructured feature points. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 469\u2013478 (2018)","DOI":"10.1109\/CVPR.2018.00056"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Zheng, Z., An, G., Wu, D., Ruan, Q.: Global and local knowledge-aware attention network for action recognition. IEEE Trans. Neural Netw. Learn. Syst. 32(1), 334\u2013347 (2020)","DOI":"10.1109\/TNNLS.2020.2978613"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Guo, M.H., Liu, Z.N., Mu, T.J., Hu, S.M.: Beyond self-attention: External attention using two linear layers for visual tasks. arXiv preprint arXiv:2105.02358 (2021)","DOI":"10.1109\/TPAMI.2022.3211006"},{"key":"12_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-030-68796-0_50","volume-title":"Pattern Recognition. ICPR International Workshops and Challenges","author":"C Plizzari","year":"2021","unstructured":"Plizzari, C., Cannici, M., Matteucci, M.: Spatial temporal transformer network for skeleton-based action recognition. In: Del Bimbo, A., et al. (eds.) ICPR 2021. LNCS, vol. 12663, pp. 694\u2013701. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-68796-0_50"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Zhang, H., Chen, Z., Wang, Z., Ouyang, W.: Disentangling and unifying graph convolutions for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 143\u2013152 (2020)","DOI":"10.1109\/CVPR42600.2020.00022"},{"key":"12_CR22","unstructured":"Joze, H.R.V., Shaban, A., Iuzzolino, M.L., Koishida, K.: MMTM: multimodal transfer module for CNN fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13289\u201313299 (2020)"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Baradel, F., Wolf, C., Mille, J.: Human action recognition: pose based attention draws focus to hands. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 604\u2013613 (2017)","DOI":"10.1109\/ICCVW.2017.77"},{"key":"12_CR24","unstructured":"Baradel, F., Wolf, C., Mille, J.: Human activity recognition with pose-driven attention to RGB. In: BMVC 2018-29th British Machine Vision Conference, pp. 1\u201314 (2018)"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Liu, G., Qian, J., Wen, F., Zhu, X., Ying, R., Liu, P.: Action recognition based on 3D skeleton and RGB frame fusion. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 258\u2013264, IEEE (2019)","DOI":"10.1109\/IROS40897.2019.8967570"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Baradel, C., Wolf, F., Mille, J., Taylor, G.W.: Glimpse clouds: human activity recognition from unstructured feature points. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 469\u2013478 (2018)","DOI":"10.1109\/CVPR.2018.00056"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Shi, L., Zhang, Y., Cheng, J., Lu, H.: Two-stream adaptive graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12026\u201312035, IEEE (2019)","DOI":"10.1109\/CVPR.2019.01230"},{"key":"12_CR28","unstructured":"Shi, F., et al.: Star: sparse transformer-based action recognition. arXiv:2107.07089 (2021)"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Li, C., Zhong, Q., Xie, D., Pu, S.: Co-occurrence feature learning from skeleton data for action recognition and detection with hierarchical aggregation. arXiv:1804.06055 (2018)","DOI":"10.24963\/ijcai.2018\/109"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, S., Chen, X., Zhang, Y., Wang, Y., Tian, Q.: Actional structural graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3595\u20133603, IEEE (2019)","DOI":"10.1109\/CVPR.2019.00371"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Cho, S., Maqbool, M., Liu, F., Foroosh, H.: Self-attention network for skeleton-based human action recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 635\u2013644, IEEE (2020)","DOI":"10.1109\/WACV45572.2020.9093639"},{"issue":"16","key":"12_CR32","doi-asserted-by":"publisher","first-page":"5339","DOI":"10.3390\/s21165339","volume":"21","author":"Y Sun","year":"2021","unstructured":"Sun, Y., Shen, Y., Ma, L.: Msst-rt: multi-stream spatial-temporal relative transformer for skeleton-based action recognition. Sensors 21(16), 5339 (2021)","journal-title":"Sensors"},{"key":"12_CR33","doi-asserted-by":"publisher","first-page":"228108","DOI":"10.1109\/ACCESS.2020.3046142","volume":"8","author":"Z Zhang","year":"2020","unstructured":"Zhang, Z., Wang, Z., Zhuang, S., Huang, F.: Structure-feature fusion adaptive graph convolutional networks for skeleton-based action recognition. IEEE Access 8, 228108\u2013228117 (2020)","journal-title":"IEEE Access"},{"key":"12_CR34","doi-asserted-by":"crossref","unstructured":"Liu, M., Yuan, J.: Recognizing human actions as the evolution of pose estimation maps. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1159\u20131168 (2018)","DOI":"10.1109\/CVPR.2018.00127"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Das, S., et al.: Toyota smarthome: real-world activities of daily living. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 833\u2013842, IEEE (2019)","DOI":"10.1109\/ICCV.2019.00092"},{"key":"12_CR36","doi-asserted-by":"publisher","first-page":"103892","DOI":"10.1016\/j.jvcir.2023.103892","volume":"95","author":"T Ahmad","year":"2023","unstructured":"Ahmad, T., Rizvi, S.T.H., Kanwal, N.: Transforming spatio-temporal self-attention using action embedding for skeleton-based action recognition. J. Vis. Commun. Image Represent. 95, 103892 (2023)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"12_CR37","doi-asserted-by":"publisher","unstructured":"Guddeti, R.M.R.: Human action recognition using multi-stream attention-based deep networks with heterogeneous data from overlapping sub-actions. Neural Comput. Appl. 1\u201317 (2024). https:\/\/doi.org\/10.1007\/s00521-024-09630-0","DOI":"10.1007\/s00521-024-09630-0"}],"container-title":["Lecture Notes in Computer Science","Computational Science and Its Applications \u2013 ICCSA 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-64605-8_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T05:03:50Z","timestamp":1719810230000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-64605-8_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031646041","9783031646058"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-64605-8_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"2 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCSA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Science and Its Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccsa2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}