{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T06:39:39Z","timestamp":1768804779818,"version":"3.49.0"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,10,14]],"date-time":"2022-10-14T00:00:00Z","timestamp":1665705600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,14]],"date-time":"2022-10-14T00:00:00Z","timestamp":1665705600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFF0307902"],"award-info":[{"award-number":["2021YFF0307902"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFF0307902"],"award-info":[{"award-number":["2021YFF0307902"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research and Development Program of Heilongjiang Province, China","award":["GA21C031"],"award-info":[{"award-number":["GA21C031"]}]},{"name":"Key Research and Development Program of Heilongjiang Province, China","award":["GA21C031"],"award-info":[{"award-number":["GA21C031"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s00530-022-00998-4","type":"journal-article","created":{"date-parts":[[2022,10,14]],"date-time":"2022-10-14T13:03:46Z","timestamp":1665752626000},"page":"615-626","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Local\u2013Global Transformer Neural Network for temporal action segmentation"],"prefix":"10.1007","volume":"29","author":[{"given":"Xiaoyan","family":"Tian","sequence":"first","affiliation":[]},{"given":"Ye","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Xianglong","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,14]]},"reference":[{"issue":"3","key":"998_CR1","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/s00530-021-00885-4","volume":"28","author":"F Bhering","year":"2022","unstructured":"Bhering, F., Passos, D., Ochi, L.S., et al.: Wireless multipath video transmission: when IoT video applications meet networking\u2014a survey. Multimedia Syst. 28(3), 831\u2013850 (2022)","journal-title":"Multimedia Syst."},{"issue":"4","key":"998_CR2","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1007\/s00530-020-00652-x","volume":"27","author":"H Ullah","year":"2021","unstructured":"Ullah, H., Islam, I.U., Ullah, M., et al.: Multi-feature-based crowd video modeling for visual event detection. Multimedia Syst. 27(4), 589\u2013597 (2021)","journal-title":"Multimedia Syst."},{"issue":"3","key":"998_CR3","doi-asserted-by":"publisher","first-page":"1899","DOI":"10.1007\/s10586-020-03097-z","volume":"23","author":"Y Lu","year":"2020","unstructured":"Lu, Y., An, S.: Research on sports video detection technology motion 3d reconstruction based on hidden markov model. Cluster Comput. 23(3), 1899\u20131909 (2020)","journal-title":"Cluster Comput."},{"issue":"5","key":"998_CR4","doi-asserted-by":"publisher","first-page":"565","DOI":"10.1007\/s00530-017-0561-x","volume":"25","author":"MS Hossain","year":"2019","unstructured":"Hossain, M.S., Muhammad, G., Alamri, A.: Smart healthcare monitoring: a voice pathology detection paradigm for smart cities. Multimedia Syst. 25(5), 565\u2013575 (2019)","journal-title":"Multimedia Syst."},{"key":"998_CR5","doi-asserted-by":"crossref","unstructured":"He, J., Xie, Y., Luan, X., Zhang, L., Zhang, X.: Srn: The movie character relationship analysis via social network. In: 24th International Conference on MultiMedia Modeling (MMM) 10705, 289\u2013301 (2018)","DOI":"10.1007\/978-3-319-73600-6_25"},{"key":"998_CR6","doi-asserted-by":"crossref","unstructured":"Kacprzyk, J., Knyazeva, M., Bozhenyuk, A.: Fuzzy Interval-Valued Temporal Automated Planning and Scheduling Problem. In: International Conference on Theory and Application of Soft Computing, Computing with Words and Perceptions, 51\u201358 (2021)","DOI":"10.1007\/978-3-030-92127-9_11"},{"key":"998_CR7","doi-asserted-by":"crossref","unstructured":"Zhang, H., Liu, D., Xiong, Z.: Ieee Two-stream action recognition-oriented video super-resolution. In: IEEE\/CVF International Conference on Computer Vision (ICCV), 8798\u20138807 (2019)","DOI":"10.1109\/ICCV.2019.00889"},{"key":"998_CR8","doi-asserted-by":"crossref","unstructured":"Singh, B., Marks, T.K., Jones, M., Tuzel, O., Shao, M.: Ieee A multi-stream bi-directional recurrent neural network for fine-grained action detection. In: 29th IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 1961\u20131970 (2016)","DOI":"10.1109\/CVPR.2016.216"},{"issue":"8","key":"998_CR9","doi-asserted-by":"publisher","first-page":"2482","DOI":"10.1109\/TCSVT.2018.2867286","volume":"29","author":"N Xu","year":"2019","unstructured":"Xu, N., Liu, A.-A., Wong, Y., Zhang, Y., Nie, W., Su, Y., Kankanhalli, M.: Dual-stream recurrent neural network for video captioning. IEEE Trans. Circuits Syst. Video Technol. 29(8), 2482\u20132493 (2019)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"998_CR10","doi-asserted-by":"crossref","unstructured":"Yu, T., Li, Y., Li, B.: Rhyrnn: Rhythmic rnn for recognizing events in long and complex videos. 16th European Conference on Computer Vision (ECCV), 127\u2013144.s (2020)","DOI":"10.1007\/978-3-030-58607-2_8"},{"key":"998_CR11","doi-asserted-by":"crossref","unstructured":"Mavroudi, E., Bhaskara, D., Sefati, S., Ali, H., Vidal, R.: Ieee End-to-end fine-grained action segmentation and recognition using conditional random field models and discriminative sparse coding. In: 18th IEEE Winter Conference on Applications of Computer Vision (WACV), 1558\u20131567 (2018)","DOI":"10.1109\/WACV.2018.00174"},{"key":"998_CR12","doi-asserted-by":"crossref","unstructured":"Lea, C., Flynn, M.D., Vidal, R., Reiter, A., Hager, G.D.: Ieee Temporal convolutional networks for action segmentation and detection. In: 30th IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 1003\u20131012 (2017)","DOI":"10.1109\/CVPR.2017.113"},{"key":"998_CR13","doi-asserted-by":"crossref","unstructured":"Abu Farha, Y., Gall, J., Soc, I.C.: Ms-tcn: Multi-stage temporal convolutional network for action segmentation. In: 32nd IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 3570\u20133579 (2019)","DOI":"10.1109\/CVPR.2019.00369"},{"key":"998_CR14","doi-asserted-by":"crossref","unstructured":"Lei, P., Todorovic, S.: Ieee Temporal deformable residual networks for action segmentation in videos. In: 31st IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 6742\u20136751 (2018)","DOI":"10.1109\/CVPR.2018.00705"},{"key":"998_CR15","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need. In: 31st Annual Conference on Neural Information Processing Systems (NIPS) 30 (2017)"},{"key":"998_CR16","doi-asserted-by":"crossref","unstructured":"Peng, Z., Huang, W., Gu, S., Xie, L., Wang, Y., Jiao, J., Ye, Q.: Conformer: Local features coupling global representations for visual recognition. In: IEEE\/CVF International Conference on Computer Vision (ICCV), 367\u2013376 (2021)","DOI":"10.1109\/ICCV48922.2021.00042"},{"key":"998_CR17","doi-asserted-by":"crossref","unstructured":"Wan, K., He, B., Zh, W-P., Ieee Tstnn: Two-stage transformer based neural network for speech enhancement in the time domain. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 7098\u20137102 (2021)","DOI":"10.1109\/ICASSP39728.2021.9413740"},{"key":"998_CR18","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: The long-document transformer (2020)"},{"key":"998_CR19","doi-asserted-by":"crossref","unstructured":"Fathi, A., Ren, X., Rehg, J.M.: Ieee Learning to recognize objects in egocentric activities. IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2011)","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"998_CR20","doi-asserted-by":"crossref","unstructured":"Ishikawa, Y., Kasai, S., Aoki, Y., Kataoka, H.: Ieee Alleviating over-segmentation errors by detecting action boundaries. IEEE Winter Conference on Applications of Computer Vision (WACV), 2321\u20132330 (2021)","DOI":"10.1109\/WACV48630.2021.00237"},{"key":"998_CR21","doi-asserted-by":"crossref","unstructured":"Wang, D., Hu, D., Li, X., Dou, D., Assoc Advancement Artificial I.: Temporal relational modeling with self-supervision for action segmentation. In: 35th AAAI Conference on Artificial Intelligence \/ 33rd Conference on Innovative Applications of Artificial Intelligence \/ 11th Symposium on Educational Advances in Artificial Intelligence 35, 2729\u20132737 (2021)","DOI":"10.1609\/aaai.v35i4.16377"},{"key":"998_CR22","doi-asserted-by":"crossref","unstructured":"Stein, S., McKenna, S.J., Assoc Comp M.: Combining embedded accelerometers with computer vision for recognizing food preparation activities. ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp), 729\u2013738 (2013)","DOI":"10.1145\/2493432.2493482"},{"key":"998_CR23","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1016\/j.neucom.2021.04.121","volume":"454","author":"Y Li","year":"2021","unstructured":"Li, Y., Dong, Z., Liu, K., Feng, L., Hu, L., Zhu, J., Xu, L., Wang, Y., Liu, S.: Efficient two-step networks for temporal action segmentation. Neurocomputing 454, 373\u2013381 (2021)","journal-title":"Neurocomputing"},{"key":"998_CR24","doi-asserted-by":"crossref","unstructured":"Li, S-J., Abu Farha, Y., Liu, Y., Cheng, M-M., Gall, J.: Ms-tcn++: Multi-stage temporal convolutional network for action segmentation. IEEE Trans. Pattern Anal. Mach. Intell (2020)","DOI":"10.1109\/TPAMI.2020.3021756"},{"key":"998_CR25","unstructured":"Karaman, S., Seidenari, L., Del Bimbo, A.: Fast saliency based pooling of fisher encoded dense trajectories. ECCV THUMOS Workshop (2014)"},{"key":"998_CR26","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Gall, J., Serre, T.: An end-to-end generative framework for video segmentation and recognition. In: IEEE Winter Conference on Applications of Computer Vision (WACV) (2016)","DOI":"10.1109\/WACV.2016.7477701"},{"key":"998_CR27","doi-asserted-by":"crossref","unstructured":"Vo, N.N., Bobick, A.F.: Ieee From stochastic grammar to bayes network: Probabilistic parsing of complex activity. 27th IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2641\u20132648 (2014)","DOI":"10.1109\/CVPR.2014.338"},{"key":"998_CR28","doi-asserted-by":"crossref","unstructured":"Huang, Y., Sugano, Y., Sato, Y.: Improving action segmentation via graph based temporal reasoning. 33th IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 14024\u201314034. (2020)","DOI":"10.1109\/CVPR42600.2020.01404"},{"key":"998_CR29","doi-asserted-by":"crossref","unstructured":"Wang, Z., Gao, Z., Wang, L., Li, Z., Wu, G.: Boundary-aware cascade networks for temporal action segmentation. In: 16th European Conference on Computer Vision (ECCV), 34\u201351 (2020)","DOI":"10.1007\/978-3-030-58595-2_3"},{"key":"998_CR30","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/j.neucom.2020.03.066","volume":"407","author":"D Wang","year":"2020","unstructured":"Wang, D., Yuan, Y., Wang, Q.: Gated forward refinement network for action segmentation. Neurocomputing 407, 63\u201371 (2020)","journal-title":"Neurocomputing"},{"key":"998_CR31","unstructured":"Singhania, D., Rahaman, R., Yao, A.: Coarse to fine multi-resolution temporal convolutional network. arXiv preprint arXiv:2105.10859 (2021)"},{"key":"998_CR32","doi-asserted-by":"crossref","unstructured":"Ahn, H., Lee, D.: Refining action segmentation with hierarchical video representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), 16302\u201316310 (2021)","DOI":"10.1109\/ICCV48922.2021.01599"},{"key":"998_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Tang, S., Muandet, K., Jarvers, C., Neumann, H., Soc, I.C.: Local temporal bilinear pooling for fine-grained action parsing. In: 32nd IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 11997\u201312007 (2019)","DOI":"10.1109\/CVPR.2019.01228"},{"key":"998_CR34","unstructured":"Zhang, Y., Muandet, K., Ma, Q., Neumann, H., Tang, S.: Frontal low-rank random tensors for fine-grained action segmentation. arXiv preprint arXiv:1906.01004 (2019)"},{"key":"998_CR35","doi-asserted-by":"publisher","first-page":"107039","DOI":"10.1016\/j.patcog.2019.107039","volume":"98","author":"H Gammulle","year":"2020","unstructured":"Gammulle, H., Denman, S., Sridharan, S., Fookes, C.: Fine-grained action segmentation using the semi-supervised action gan. Pattern Recognit. 98, 107039 (2020)","journal-title":"Pattern Recognit."},{"key":"998_CR36","doi-asserted-by":"crossref","unstructured":"Chen, M., Li, B., Bao, Y., Alregib, G., Kira, Z.: Action segmentation with joint self-supervised temporal domain adaptation. 33th IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 9454\u20139463 (2020)","DOI":"10.1109\/CVPR42600.2020.00947"},{"key":"998_CR37","doi-asserted-by":"crossref","unstructured":"Zeng, Y., Fu, J., Chao, H.: Learning joint spatial-temporal transformations for video inpainting. European Conference on Computer Vision, 528\u2013543 (2020)","DOI":"10.1007\/978-3-030-58517-4_31"},{"key":"998_CR38","doi-asserted-by":"crossref","unstructured":"Dai, Z., Cai, B., Lin. Y., Chen, J., Ieee Comp S O C Up-detr: Unsupervised pre-training for object detection with transformers. 34th IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 1601\u20131610 (2021)","DOI":"10.1109\/CVPR46437.2021.00165"},{"key":"998_CR39","unstructured":"Yi, F., Wen, H., Jiang, T.: Asformer: Transformer for action segmentation (2021)"},{"key":"998_CR40","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Ieee Quo vadis, action recognition? A new model and the kinetics dataset. 30th IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 4724\u20134733 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"998_CR41","doi-asserted-by":"publisher","DOI":"10.1145\/3530811","author":"Y Tay","year":"2020","unstructured":"Tay, Y., Dehghani, M., Bahri, D., et al.: Efficient transformers: a survey. ACM Comput. Surv (CSUR) (2020). https:\/\/doi.org\/10.1145\/3530811","journal-title":"ACM Comput. Surv (CSUR)"},{"key":"998_CR42","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: Bsn: Boundary sensitive network for temporal action proposal generation. In: 15th European Conference on Computer Vision (ECCV) 11208, 3\u201321 (2018)","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"998_CR43","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Arslan, A., Serre, T.: Ieee The language of actions: Recovering the syntax and semantics of goal-directed human activities. 27th IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 780\u2013787 (2014)","DOI":"10.1109\/CVPR.2014.105"},{"issue":"3","key":"998_CR44","first-page":"61","volume":"10","author":"J Platt","year":"1999","unstructured":"Platt, J.: Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods. Adv. Large Margin Classifiers. 10(3), 61\u201374 (1999)","journal-title":"Adv. Large Margin Classifiers."},{"key":"998_CR45","unstructured":"Guo C, Pleiss G, Sun Y, et al (2017) On calibration of modern neural networks. International Conference on Machine Learning, 1321\u20131330. PMLR."},{"key":"998_CR46","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, et al (2021) Swin transformer: Hierarchical vision transformer using shifted windows. IEEE\/CVF International Conference on Computer Vision (ICCV), 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-022-00998-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-022-00998-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-022-00998-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T19:07:50Z","timestamp":1677524870000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-022-00998-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,14]]},"references-count":46,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["998"],"URL":"https:\/\/doi.org\/10.1007\/s00530-022-00998-4","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,14]]},"assertion":[{"value":"23 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 October 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We declare that we have no known competing financial interests or personal relationships that have influenced the work reported in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}