{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T06:21:51Z","timestamp":1779344511014,"version":"3.51.4"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T00:00:00Z","timestamp":1697500800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T00:00:00Z","timestamp":1697500800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"ational Natural Science Foundation of China","award":["51935005"],"award-info":[{"award-number":["51935005"]}]},{"name":"Basic Scientific Research Project","award":["JCKY20200603C010"],"award-info":[{"award-number":["JCKY20200603C010"]}]},{"DOI":"10.13039\/501100005046","name":"Natural Science Foundation of Heilongjiang Province of China","doi-asserted-by":"crossref","award":["LH2021F023"],"award-info":[{"award-number":["LH2021F023"]}],"id":[{"id":"10.13039\/501100005046","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Science & Technology Planned Project of Heilongjiang Province of China","award":["GA21C031"],"award-info":[{"award-number":["GA21C031"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17276-8","type":"journal-article","created":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T06:02:45Z","timestamp":1697522565000},"page":"44273-44297","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Spatial-temporal graph transformer network for skeleton-based temporal action segmentation"],"prefix":"10.1007","volume":"83","author":[{"given":"Xiaoyan","family":"Tian","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ye","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xianglong","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,17]]},"reference":[{"key":"17276_CR1","doi-asserted-by":"crossref","unstructured":"Zhang Z, Wang W, Tian X (2023) Semantic segmentation of metal surface defects and corresponding strategies. IEEE Trans Instrum Meas 72:1\u201313","DOI":"10.1109\/TIM.2023.3282301"},{"issue":"1","key":"17276_CR2","doi-asserted-by":"publisher","first-page":"8699","DOI":"10.1038\/s41598-023-36015-5","volume":"13","author":"L Casini","year":"2023","unstructured":"Casini L, Marchetti N, Montanucci A et al (2023) A human-AI collaboration workflow for archaeological sites detection. Sci Rep 13(1):8699","journal-title":"Sci Rep"},{"key":"17276_CR3","doi-asserted-by":"publisher","first-page":"4623","DOI":"10.1007\/s11042-018-6423-5","volume":"78","author":"F Kong","year":"2019","unstructured":"Kong F, Wang Y (2019) Multimodal interface interaction design model based on dynamic augmented reality. Multimedia Tools Appl 78:4623\u20134653","journal-title":"Multimedia Tools Appl"},{"key":"17276_CR4","unstructured":"Ding G, Sener F, Yao A (2022) Temporal action segmentation: an analysis of modern technique. arXiv:2210.10352"},{"key":"17276_CR5","doi-asserted-by":"publisher","first-page":"2907","DOI":"10.1007\/s11042-020-09741-5","volume":"80","author":"M Rashmi","year":"2021","unstructured":"Rashmi M, Ashwin TS, Guddeti RMR (2021) Surveillance video analysis for student action recognition and localization inside computer laboratories of a smart campus. Multimedia Tools Appl 80:2907\u20132929","journal-title":"Multimedia Tools Appl"},{"issue":"5","key":"17276_CR6","doi-asserted-by":"publisher","first-page":"7439","DOI":"10.1007\/s11042-022-12000-4","volume":"81","author":"MF Tsai","year":"2022","unstructured":"Tsai MF, Huang SH (2022) Enhancing accuracy of human action recognition system using skeleton point correction method. Multimedia Tools Appl 81(5):7439\u20137459","journal-title":"Multimedia Tools Appl"},{"key":"17276_CR7","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"17276_CR8","unstructured":"Soomro K, Zamir A R, Shah M (2012) UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv:1212.0402"},{"key":"17276_CR9","doi-asserted-by":"crossref","unstructured":"Lea C, Flynn M D, Vidal R et al (2017) Temporal convolutional networks for action segmentation and detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 156\u2013165","DOI":"10.1109\/CVPR.2017.113"},{"key":"17276_CR10","doi-asserted-by":"crossref","unstructured":"Kuehne H, Gall J, Serre T (2016) An end-to-end generative framework for video segmentation and recognition. In: 2016 IEEE winter conference on applications of computer vision (WACV). IEEE, pp 1\u20138","DOI":"10.1109\/WACV.2016.7477701"},{"key":"17276_CR11","doi-asserted-by":"crossref","unstructured":"Farha Y A, Gall J (2019) Ms-tcn: Multi-stage temporal convolutional network for action segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 3575\u20133584","DOI":"10.1109\/CVPR.2019.00369"},{"key":"17276_CR12","doi-asserted-by":"crossref","unstructured":"Li SJ, AbuFarha Y, Liu Y et al (2020) Ms-tcn++: multi-stage temporal convolutional network for action segmentation. IEEE Trans Pattern Anal Mach Intell 45:6647\u20136658","DOI":"10.1109\/TPAMI.2020.3021756"},{"key":"17276_CR13","doi-asserted-by":"crossref","unstructured":"Ishikawa Y, Kasai S, Aoki Y, et al (2021) Alleviating over-segmentation errors by detecting action boundaries. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision. pp 2322\u20132331","DOI":"10.1109\/WACV48630.2021.00237"},{"key":"17276_CR14","doi-asserted-by":"crossref","unstructured":"Wang Z, Gao Z, Wang L, et al (2020) Boundary-aware cascade networks for temporal action segmentation. In: Computer vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part XXV 16. Springer International Publishing, pp 34\u201351","DOI":"10.1007\/978-3-030-58595-2_3"},{"issue":"3","key":"17276_CR15","doi-asserted-by":"publisher","first-page":"2738","DOI":"10.1007\/s10489-022-03569-2","volume":"53","author":"D Yang","year":"2023","unstructured":"Yang D, Cao Z, Mao L et al (2023) A temporal and channel-combined attention block for action segmentation. Appl Intell 53(3):2738\u20132750","journal-title":"Appl Intell"},{"key":"17276_CR16","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1016\/j.neucom.2021.04.121","volume":"454","author":"Y Li","year":"2021","unstructured":"Li Y, Dong Z, Liu K et al (2021) Efficient two-step networks for temporal action segmentation. Neurocomputing 454:373\u2013381","journal-title":"Neurocomputing"},{"key":"17276_CR17","unstructured":"Yi F, Wen H, Jiang T (2021) Asformer: Transformer for action segmentation. arXiv:2110.08568"},{"key":"17276_CR18","doi-asserted-by":"publisher","first-page":"104567","DOI":"10.1016\/j.imavis.2022.104567","volume":"128","author":"N Aziere","year":"2022","unstructured":"Aziere N, Todorovic S (2022) Multistage temporal convolution transformer for action segmentation. Image Vis Comput 128:104567","journal-title":"Image Vis Comput"},{"issue":"2","key":"17276_CR19","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/s00530-022-00998-4","volume":"29","author":"X Tian","year":"2023","unstructured":"Tian X, Jin Y, Tang X (2023) Local-global transformer neural network for temporal action segmentation. Multimedia Syst 29(2):615\u2013626","journal-title":"Multimedia Syst"},{"key":"17276_CR20","doi-asserted-by":"crossref","unstructured":"Tian X, Jin Y, Tang X (2023) TSRN: two-stage refinement network for temporal action segmentation. Pattern Anal Appl 26:1375\u20131393","DOI":"10.1007\/s10044-023-01166-8"},{"key":"17276_CR21","unstructured":"Singhania D, Rahaman R, Yao A (2021) Coarse to fine multi-resolution temporal convolutional network. arXiv:2105.10859"},{"key":"17276_CR22","doi-asserted-by":"publisher","first-page":"108764","DOI":"10.1016\/j.patcog.2022.108764","volume":"129","author":"J Park","year":"2022","unstructured":"Park J, Kim D, Huh S et al (2022) Maximization and restoration: Action segmentation through dilation passing and temporal reconstruction. Pattern Recogn 129:108764","journal-title":"Pattern Recogn"},{"key":"17276_CR23","unstructured":"Du D, Su B, Li Y, et al (2022) Efficient U-transformer with boundary-aware loss for action segmentation. arXiv:2205.13425"},{"key":"17276_CR24","unstructured":"Kipf T N, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv:1609.02907"},{"key":"17276_CR25","doi-asserted-by":"crossref","unstructured":"Shi L, Zhang Y, Cheng J et al (2019) Two-stream adaptive graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 12026\u201312035","DOI":"10.1109\/CVPR.2019.01230"},{"key":"17276_CR26","doi-asserted-by":"crossref","unstructured":"Plizzari C, Cannici M, Matteucci M (2021) Spatial temporal transformer network for skeleton-based action recognition. Pattern Recognition. In: ICPR international workshops and challenges: virtual event, January 10-15, 2021, Proceedings, Part III. Springer International Publishing, pp 694\u2013701","DOI":"10.1007\/978-3-030-68796-0_50"},{"key":"17276_CR27","doi-asserted-by":"crossref","unstructured":"Shi L, Zhang Y, Cheng J et al (2020) Decoupled spatial-temporal attention network for skeleton-based action-gesture recognition. In: Proceedings of the Asian conference on computer vision","DOI":"10.1007\/978-3-030-69541-5_3"},{"key":"17276_CR28","doi-asserted-by":"publisher","unstructured":"Filtjens B, Vanrumste B, Slaets P (2022) Skeleton-based action segmentation with multi-stage spatial-temporal graph convolutional neural networks. IEEE Trans Emerg Top Comput https:\/\/doi.org\/10.1109\/TETC.2022.3230912","DOI":"10.1109\/TETC.2022.3230912"},{"key":"17276_CR29","doi-asserted-by":"publisher","first-page":"103707","DOI":"10.1016\/j.cviu.2023.103707","volume":"232","author":"L Xu","year":"2023","unstructured":"Xu L, Wang Q, Lin X et al (2023) An efficient framework for few-shot skeleton-based temporal action segmentation. Comput Vis Image Underst 232:103707","journal-title":"Comput Vis Image Underst"},{"key":"17276_CR30","doi-asserted-by":"publisher","first-page":"1883","DOI":"10.1109\/LSP.2022.3199670","volume":"29","author":"K Liu","year":"2022","unstructured":"Liu K, Li Y, Xu Y et al (2022) Spatial focus attention for fine-grained skeleton-based action task. IEEE Signal Process Lett 29:1883\u20131887","journal-title":"IEEE Signal Process Lett"},{"issue":"7","key":"17276_CR31","doi-asserted-by":"publisher","first-page":"5935","DOI":"10.1109\/TCYB.2021.3064092","volume":"52","author":"J Chen","year":"2021","unstructured":"Chen J, Zhong M, Li J et al (2021) Effective deep attributed network representation learning with topology adapted smoothing. IEEE Trans Cybern 52(7):5935\u20135946","journal-title":"IEEE Trans Cybern"},{"key":"17276_CR32","doi-asserted-by":"publisher","first-page":"108146","DOI":"10.1016\/j.knosys.2022.108146","volume":"240","author":"J Chen","year":"2022","unstructured":"Chen J, Zhong M, Li J, Liu Y, Zhang H, Xu D et al (2022) Graph transformer network with temporal kernel attention for skeleton-based action recognition. Knowl-Based Syst 240:108146","journal-title":"Knowl-Based Syst"},{"key":"17276_CR33","doi-asserted-by":"crossref","unstructured":"Liu Z, Zhang H, Chen Z et al (2020) Disentangling and unifying graph convolutions for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 143\u2013152","DOI":"10.1109\/CVPR42600.2020.00022"},{"key":"17276_CR34","doi-asserted-by":"crossref","unstructured":"Niemann F, Reining C, Moya Rueda F et al (2020) Lara: Creating a dataset for human activity recognition in logistics using semantic attributes. Sensors 20(15):4083","DOI":"10.3390\/s20154083"},{"key":"17276_CR35","doi-asserted-by":"publisher","unstructured":"Yan S, Xiong Y, Lin D (2018) Spatial temporal graph convolutional networks for skeleton-based action recognition. Proc AAAI Conf Artif Intell 32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.12328","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"17276_CR36","doi-asserted-by":"crossref","unstructured":"Si C, Chen W, Wang W et al (2019) An attention enhanced graph convolutional lstm network for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp 1227\u20131236","DOI":"10.1109\/CVPR.2019.00132"},{"key":"17276_CR37","doi-asserted-by":"crossref","unstructured":"Li C, Zhong Q, Xie D et al (2018) Co-occurrence feature learning from skeleton data for action recognition and detection with hierarchical aggregation. arXiv:1804.06055","DOI":"10.24963\/ijcai.2018\/109"},{"key":"17276_CR38","doi-asserted-by":"crossref","unstructured":"Caetano C, Sena J, Br\u00e9mond F et al (2019) Skelemotion: A new representation of skeleton joint sequences based on motion information for 3d action recognition. 2019 16th IEEE international conference on advanced video and signal based surveillance (AVSS). IEEE, pp 1\u20138","DOI":"10.1109\/AVSS.2019.8909840"},{"key":"17276_CR39","doi-asserted-by":"crossref","unstructured":"Li H, Zhang Z, Zhao X et al (2022) Enhancing multi-modal features using local self-attention for 3D object detection. Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part X. Springer Nature Switzerland, Cham, pp 532\u2013549","DOI":"10.1007\/978-3-031-20080-9_31"},{"key":"17276_CR40","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1016\/j.patrec.2023.03.003","volume":"168","author":"W Li","year":"2023","unstructured":"Li W, Huang L (2023) YOLOSA: Object detection based on 2D local feature superimposed self-attention. Pattern Recogn Lett 168:86\u201392","journal-title":"Pattern Recogn Lett"},{"key":"17276_CR41","doi-asserted-by":"crossref","unstructured":"Ribeiro L F R, Saverese P H P, Figueiredo D R (2017) struc2vec: Learning node representations from structural identity. In: Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining. pp 385\u2013394","DOI":"10.1145\/3097983.3098061"},{"key":"17276_CR42","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. Adv Neural Inf Processing Syst 30:1\u201311"},{"key":"17276_CR43","doi-asserted-by":"crossref","unstructured":"Liu C, Hu Y, Li Y et al (2017) PKU-MMD: A large scale benchmark for skeleton-based human action understanding. In: Proceedings of the workshop on visual analysis in smart and connected communities. pp 1\u20138","DOI":"10.1145\/3132734.3132739"},{"key":"17276_CR44","doi-asserted-by":"crossref","unstructured":"Chereshnev R, Kert\u00e9sz-Farkas A (2018) Hugadb: Human gait database for activity recognition from wearable inertial sensor networks. Analysis of images, social networks and texts: 6th international conference, AIST 2017, Moscow, Russia, July 27-29, 2017, Revised Selected Papers 6. Springer International Publishing, pp 131\u2013141","DOI":"10.1007\/978-3-319-73013-4_12"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17276-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17276-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17276-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T23:14:27Z","timestamp":1730330067000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17276-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,17]]},"references-count":44,"journal-issue":{"issue":"15","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["17276"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17276-8","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,17]]},"assertion":[{"value":"14 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 August 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 September 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}