{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T12:52:13Z","timestamp":1761396733457},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T00:00:00Z","timestamp":1708473600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T00:00:00Z","timestamp":1708473600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s00530-024-01262-7","type":"journal-article","created":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T19:02:31Z","timestamp":1708542151000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["A dual-branch hybrid network of CNN and transformer with adaptive keyframe scheduling for video semantic segmentation"],"prefix":"10.1007","volume":"30","author":[{"given":"Zhixue","family":"Liang","sequence":"first","affiliation":[]},{"given":"Wenyong","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,21]]},"reference":[{"key":"1262_CR1","doi-asserted-by":"crossref","unstructured":"Li, Y., Shi, J., Lin, D.: Low-latency video semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5997\u20136005 (2018)","DOI":"10.1109\/CVPR.2018.00628"},{"key":"1262_CR2","doi-asserted-by":"crossref","unstructured":"Hu, P., Caba, F., Wang, O., Lin, Z., Sclaroff, S., Perazzi, F.: Temporally distributed networks for fast video semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8818\u20138827 (2020)","DOI":"10.1109\/CVPR42600.2020.00884"},{"key":"1262_CR3","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: Transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7262\u20137272 (2021)","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"1262_CR4","doi-asserted-by":"crossref","unstructured":"Wang, H., Wang, W., Liu, J.: Temporal memory attention for video semantic segmentation. In: 2021 IEEE International Conference on Image Processing (ICIP), pp. 2254\u20132258 (2021). IEEE","DOI":"10.1109\/ICIP42928.2021.9506731"},{"key":"1262_CR5","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.patrec.2021.04.024","volume":"148","author":"Y Jin","year":"2021","unstructured":"Jin, Y., Han, D., Ko, H.: Trseg: transformer for semantic segmentation. Pattern Recognit. Lett. 148, 29\u201335 (2021)","journal-title":"Pattern Recognit. Lett."},{"issue":"1","key":"1262_CR6","first-page":"31","volume":"61","author":"M Lazarevi\u0107","year":"2011","unstructured":"Lazarevi\u0107, M.: Stability and stabilization of fractional order time delay systems. Sci. Tech. Rev. 61(1), 31\u201345 (2011)","journal-title":"Sci. Tech. Rev."},{"key":"1262_CR7","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: Segformer: simple and efficient design for semantic segmentation with transformers. Adv. Neural Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1262_CR8","doi-asserted-by":"crossref","unstructured":"Zheng, S., Lu, J., Zhao, H., Zhu, X., Luo, Z., Wang, Y., Fu, Y., Feng, J., Xiang, T., Torr, P.H., et al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6881\u20136890 (2021)","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"1262_CR9","unstructured":"Wu, S., Wu, T., Lin, F., Tian, S., Guo, G.: Fully transformer networks for semantic image segmentation. arXiv preprint arXiv:2106.04108 (2021)"},{"key":"1262_CR10","doi-asserted-by":"crossref","unstructured":"Duan, Z., Huang, X., Ma, J.: Transformer-based cross-modal information fusion network for semantic segmentation. Neural Process.  Lett. 1\u201315 (2023)","DOI":"10.1007\/s11063-022-11142-8"},{"key":"1262_CR11","unstructured":"Qin, Z., Liu, J., Zhang, X., Tian, M., Zhou, A., Yi, S., Li, H.: Pyramid fusion transformer for semantic segmentation. arXiv preprint arXiv:2201.04019 (2022)"},{"key":"1262_CR12","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xiong, Y., Dai, J., Yuan, L., Wei, Y.: Deep feature flow for video recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2349\u20132358 (2017)","DOI":"10.1109\/CVPR.2017.441"},{"key":"1262_CR13","doi-asserted-by":"crossref","unstructured":"Paul, M., Danelljan, M., Van\u00a0Gool, L., Timofte, R.: Local memory attention for fast video semantic segmentation. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1102\u20131109 (2021). IEEE","DOI":"10.1109\/IROS51168.2021.9636192"},{"key":"1262_CR14","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, W., Chen, J., Niu, L., Si, J., Qian, C., Zhang, L.: Video semantic segmentation via sparse temporal transformer. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 59\u201368 (2021)","DOI":"10.1145\/3474085.3475409"},{"key":"1262_CR15","doi-asserted-by":"crossref","unstructured":"Gadde, R., Jampani, V., Gehler, P.V.: Semantic video cnns through representation warping. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4453\u20134462 (2017)","DOI":"10.1109\/ICCV.2017.477"},{"key":"1262_CR16","doi-asserted-by":"crossref","unstructured":"Jin, X., Li, X., Xiao, H., Shen, X., Lin, Z., Yang, J., Chen, Y., Dong, J., Liu, L., Jie, Z., et al.: Video scene parsing with predictive feature learning. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5580\u20135588 (2017)","DOI":"10.1109\/ICCV.2017.595"},{"key":"1262_CR17","doi-asserted-by":"crossref","unstructured":"Kundu, A., Vineet, V., Koltun, V.: Feature space optimization for semantic video segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3168\u20133175 (2016)","DOI":"10.1109\/CVPR.2016.345"},{"key":"1262_CR18","doi-asserted-by":"crossref","unstructured":"Nilsson, D., Sminchisescu, C.: Semantic video segmentation by gated recurrent flow propagation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6819\u20136828 (2018)","DOI":"10.1109\/CVPR.2018.00713"},{"key":"1262_CR19","doi-asserted-by":"crossref","unstructured":"Shelhamer, E., Rakelly, K., Hoffman, J., Darrell, T.: Clockwork convnets for video semantic segmentation. In: Computer Vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8-10 and 15-16, 2016, Proceedings, Part III 14, pp. 852\u2013868 (2016). Springer","DOI":"10.1007\/978-3-319-49409-8_69"},{"key":"1262_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Y., Shen, C., Yu, C., Wang, J.: Efficient semantic video segmentation with per-frame inference. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part X 16, pp. 352\u2013368 (2020). Springer","DOI":"10.1007\/978-3-030-58607-2_21"},{"key":"1262_CR21","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin, L., Fan, H., Zhang, Z., Xu, Y., Ling, H.: Swintrack: a simple and strong baseline for transformer tracking. Adv. Neural Inf. Process. Syst. 35, 16743\u201316754 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1262_CR22","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: herarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1262_CR23","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"1262_CR24","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1262_CR25","doi-asserted-by":"crossref","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 801\u2013818 (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"1262_CR26","doi-asserted-by":"crossref","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., Lu, H.: Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3146\u20133154 (2019)","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1262_CR27","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Gao, C., Yu, G., Shen, C., Sang, N.: Context prior for scene segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12416\u201312425 (2020)","DOI":"10.1109\/CVPR42600.2020.01243"},{"issue":"4","key":"1262_CR28","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1262_CR29","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Zheng, X., Ouyang, W., Li, B.: A strip dilated convolutional network for semantic segmentation. Neural Process. Lett. 1\u201321 (2022)","DOI":"10.1007\/s11063-022-11048-5"},{"key":"1262_CR30","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"1262_CR31","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"1262_CR32","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5\u20139, 2015, Proceedings, Part III 18, pp. 234\u2013241 (2015). Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"issue":"12","key":"1262_CR33","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., Cipolla, R.: SegNet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(12), 2481\u20132495 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1262_CR34","doi-asserted-by":"crossref","unstructured":"Lin, G., Milan, A., Shen, C., Reid, I.: RefineNet: multi-path refinement networks for high-resolution semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1925\u20131934 (2017)","DOI":"10.1109\/CVPR.2017.549"},{"issue":"5","key":"1262_CR35","doi-asserted-by":"publisher","first-page":"1551","DOI":"10.1007\/s11263-021-01445-z","volume":"129","author":"R Mohan","year":"2021","unstructured":"Mohan, R., Valada, A.: EfficientPS: efficient panoptic segmentation. Int. J. Comput. Vis. 129(5), 1551\u20131579 (2021)","journal-title":"Int. J. Comput. Vis."},{"key":"1262_CR36","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16 \u00d7 16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1262_CR37","doi-asserted-by":"crossref","unstructured":"Sun, G., Liu, Y., Ding, H., Probst, T., Van\u00a0Gool, L.: Coarse-to-fine feature mining for video semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3126\u20133137 (2022)","DOI":"10.1109\/CVPR52688.2022.00313"},{"key":"1262_CR38","doi-asserted-by":"crossref","unstructured":"Jain, S., Wang, X., Gonzalez, J.E.: Accel: A corrective fusion network for efficient semantic segmentation on video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8866\u20138875 (2019)","DOI":"10.1109\/CVPR.2019.00907"},{"key":"1262_CR39","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1016\/j.neucom.2021.12.003","volume":"474","author":"J Liu","year":"2022","unstructured":"Liu, J., Xu, X., Shi, Y., Deng, C., Shi, M.: RelaxNet: residual efficient learning and attention expected fusion network for real-time semantic segmentation. Neurocomputing 474, 115\u2013127 (2022)","journal-title":"Neurocomputing"},{"key":"1262_CR40","doi-asserted-by":"crossref","unstructured":"Brostow, G.J., Shotton, J., Fauqueur, J., Cipolla, R.: Segmentation and recognition using structure from motion point clouds. In: Computer Vision\u2013ECCV 2008: 10th European Conference on Computer Vision, Marseille, France, October 12-18, 2008, Proceedings, Part I 10, pp. 44\u201357 (2008). Springer","DOI":"10.1007\/978-3-540-88682-2_5"},{"key":"1262_CR41","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009). IEEE","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1262_CR42","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.-Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convNet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"1262_CR43","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"1262_CR44","doi-asserted-by":"crossref","unstructured":"Liu, Z., Ning, J., Cao, Y., Wei, Y., Zhang, Z., Lin, S., Hu, H.: Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01262-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01262-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01262-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T13:04:56Z","timestamp":1712927096000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01262-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,21]]},"references-count":44,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["1262"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01262-7","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,21]]},"assertion":[{"value":"11 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"67"}}