{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T17:21:27Z","timestamp":1770830487177,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100013317","name":"Shanxi Provincial Key Research and Development Project","doi-asserted-by":"publisher","award":["202202020101007"],"award-info":[{"award-number":["202202020101007"]}],"id":[{"id":"10.13039\/501100013317","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013317","name":"Shanxi Provincial Key Research and Development Project","doi-asserted-by":"publisher","award":["202202020101007"],"award-info":[{"award-number":["202202020101007"]}],"id":[{"id":"10.13039\/501100013317","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013317","name":"Shanxi Provincial Key Research and Development Project","doi-asserted-by":"publisher","award":["202202020101007"],"award-info":[{"award-number":["202202020101007"]}],"id":[{"id":"10.13039\/501100013317","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013317","name":"Shanxi Provincial Key Research and Development Project","doi-asserted-by":"publisher","award":["202202020101007"],"award-info":[{"award-number":["202202020101007"]}],"id":[{"id":"10.13039\/501100013317","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013317","name":"Shanxi Provincial Key Research and Development Project","doi-asserted-by":"publisher","award":["202202020101007"],"award-info":[{"award-number":["202202020101007"]}],"id":[{"id":"10.13039\/501100013317","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanxi Province Key Research and Development Project","award":["202102020101004"],"award-info":[{"award-number":["202102020101004"]}]},{"name":"Shanxi Province Key Research and Development Project","award":["202102020101004"],"award-info":[{"award-number":["202102020101004"]}]},{"name":"Shanxi Province Key Research and Development Project","award":["202102020101004"],"award-info":[{"award-number":["202102020101004"]}]},{"name":"Shanxi Province Key Research and Development Project","award":["202102020101004"],"award-info":[{"award-number":["202102020101004"]}]},{"name":"Shanxi Province Key Research and Development Project","award":["202102020101004"],"award-info":[{"award-number":["202102020101004"]}]},{"name":"Fundamental Research Programs of Shanxi Province","award":["20210302124168"],"award-info":[{"award-number":["20210302124168"]}]},{"name":"Fundamental Research Programs of Shanxi Province","award":["20210302124168"],"award-info":[{"award-number":["20210302124168"]}]},{"name":"Fundamental Research Programs of Shanxi Province","award":["20210302124168"],"award-info":[{"award-number":["20210302124168"]}]},{"name":"Fundamental Research Programs of Shanxi Province","award":["20210302124168"],"award-info":[{"award-number":["20210302124168"]}]},{"name":"Fundamental Research Programs of Shanxi Province","award":["20210302124168"],"award-info":[{"award-number":["20210302124168"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s00530-024-01497-4","type":"journal-article","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T09:27:20Z","timestamp":1732613240000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["EfficientFusion: simple and efficient learning with pixel-level fusion for semantic segmentation"],"prefix":"10.1007","volume":"30","author":[{"given":"Ping","family":"Liu","sequence":"first","affiliation":[]},{"given":"Shuaijie","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Yuting","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Shufeng","family":"Hao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,26]]},"reference":[{"key":"1497_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-16782-z","author":"W Depeng","year":"2023","unstructured":"Depeng, W., Huabin, W.: MFFLNet: lightweight semantic segmentation network based on multi-scale feature fusion. Multimed. Tools Appl. (2023). https:\/\/doi.org\/10.1007\/s11042-023-16782-z","journal-title":"Multimed. Tools Appl."},{"key":"1497_CR2","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/s13735-017-0141-z","volume":"7","author":"Y Guo","year":"2018","unstructured":"Guo, Y., Liu, Y., Georgiou, T., et al.: A review of semantic segmentation using deep neural networks. Int J Multim Inform Retr. 7, 87\u201393 (2018)","journal-title":"Int J Multim Inform Retr."},{"key":"1497_CR3","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.neucom.2018.03.037","volume":"304","author":"H Yu","year":"2018","unstructured":"Yu, H., Yang, Z., Tan, L., et al.: Methods and datasets on semantic segmentation: a review. Neurocomputing 304, 82\u2013103 (2018)","journal-title":"Neurocomputing"},{"issue":"3","key":"1497_CR4","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TITS.2020.2972974","volume":"22","author":"D Feng","year":"2020","unstructured":"Feng, D., Haase-Sch\u00fctz, C., Rosenbaum, L., et al.: Deep multi-modal object detection and semantic segmentation for autonomous driving: datasets, methods, and challenges. IEEE Trans. Intell. Transp. Syst. 22(3), 1341\u20131360 (2020)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1497_CR5","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1007\/978-3-030-28954-6_15","volume-title":"Explainable AI interpreting, explaining and visualizing deep learning","author":"M Hofmarcher","year":"2019","unstructured":"Hofmarcher, M., Unterthiner, T., Arjona-Medina, J., et al.: Visual scene understanding for autonomous driving using semantic segmentation. In: Samek, W., Montavon, G., et al. (eds.) Explainable AI interpreting, explaining and visualizing deep learning, pp. 285\u2013296. Springer, Cham (2019)"},{"issue":"3","key":"1497_CR6","first-page":"2487","volume":"36","author":"L Wang","year":"2022","unstructured":"Wang, L., Li, D., Liu, H., et al.: Cross-dataset collaborative learning for semantic segmentation in autonomous driving. Proceed. AAAI Conf. Artif. Intell. 36(3), 2487\u20132494 (2022)","journal-title":"Proceed. AAAI Conf. Artif. Intell."},{"issue":"11","key":"1497_CR7","doi-asserted-by":"publisher","first-page":"2453","DOI":"10.1109\/TMI.2018.2835303","volume":"37","author":"L Chen","year":"2018","unstructured":"Chen, L., Bentley, P., Mori, K., et al.: DRINet for medical image segmentation. IEEE Trans. Med. Imaging 37(11), 2453\u20132462 (2018)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1497_CR8","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Rahman Siddiquee, M.M., Tajbakhsh, N., et al.: Unet++: A nested u-net architecture for medical image segmentation[C]\/\/Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support: 4th International Workshop, DLMIA 2018, and 8th International Workshop, ML-CDS 2018, Held in Conjunction with MICCAI 2018, Granada, Spain, September 20, 2018, Proceedings 4. Springer International Publishing 3-11 (2018)","DOI":"10.1007\/978-3-030-00889-5_1"},{"key":"1497_CR9","doi-asserted-by":"crossref","unstructured":"Hatamizadeh, A., Tang, Y., Nath, V., et al.: Unetr: Transformers for 3d medical image segmentation. Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 574\u2013584 (2022)","DOI":"10.1109\/WACV51458.2022.00181"},{"key":"1497_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, W., Van Gool, L.: Road: reality oriented adaptation for semantic segmentation of urban scenes. Proceedings of the IEEE conference on computer vision and pattern recognition. 7892\u20137901 (2018)","DOI":"10.1109\/CVPR.2018.00823"},{"issue":"4","key":"1497_CR11","doi-asserted-by":"publisher","first-page":"808","DOI":"10.3390\/rs13040808","volume":"13","author":"B Neupane","year":"2021","unstructured":"Neupane, B., Horanont, T., Aryal, J.: Deep learning-based semantic segmentation of urban features in satellite images: a review and meta-analysis. Remote Sens. 13(4), 808 (2021)","journal-title":"Remote Sens."},{"issue":"16","key":"1497_CR12","doi-asserted-by":"publisher","first-page":"3087","DOI":"10.3390\/rs13163087","volume":"13","author":"S Seong","year":"2021","unstructured":"Seong, S., Choi, J.: Semantic segmentation of urban buildings using a high-resolution network (HRNet) with channel and spatial attention gates. Remote Sens. 13(16), 3087 (2021)","journal-title":"Remote Sens."},{"key":"1497_CR13","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. Proceedings of the IEEE conference on computer vision and pattern recognition. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1497_CR14","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"1497_CR15","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., et al.: Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv preprint arXiv:1412.7062 (2014)"},{"issue":"4","key":"1497_CR16","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., et al.: Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1497_CR17","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et al.: Attention is all you need. Adv. Neural Inform. Process. Syst., 30 (2017)"},{"key":"1497_CR18","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1497_CR19","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., et al.: SegFormer: Simple and efficient design for semantic segmentation with transformers. Adv. Neural. Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1497_CR20","first-page":"13937","volume":"34","author":"Y Rao","year":"2021","unstructured":"Rao, Y., Zhao, W., Liu, B., et al.: Dynamicvit: Efficient vision transformers with dynamic token sparsification[J]. Adv. Neural. Inf. Process. Syst. 34, 13937\u201313949 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1497_CR21","unstructured":"Bolya, D., Fu, C.Y., Dai, X., et al.: Token merging: Your vit but faster[J]. arXiv preprint arXiv:2210.09461 (2022)"},{"key":"1497_CR22","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation[C]\/\/Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing: 234-241 (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"issue":"12","key":"1497_CR23","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., Cipolla, R.: Segnet: A deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(12), 2481\u20132495 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1497_CR24","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., et al.: Pyramid scene parsing network[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"1497_CR25","doi-asserted-by":"crossref","unstructured":"Zheng, S., Lu, J., Zhao, H., et al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 6881\u20136890 (2021)","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"1497_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., et al.: Swin transformer: Hierarchical vision transformer using shifted windows. Proceedings of the IEEE\/CVF international conference on computer vision. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1497_CR27","unstructured":"Wang, Y., Du, B., Xu, C.: Multi-tailed vision transformer for efficient inference. arXiv preprint arXiv:2203.01587 (2022)"},{"key":"1497_CR28","doi-asserted-by":"crossref","unstructured":"Tang, Y., Han, K., Wang, Y., et al.: Patch slimming for efficient vision transformers. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12165\u201312174 (2022)","DOI":"10.1109\/CVPR52688.2022.01185"},{"key":"1497_CR29","unstructured":"Touvron, H., Cord, M., Douze, M., et al.: Training data-efficient image transformers & distillation through attention. International conference on machine learning. PMLR, 10347\u201310357 (2021)"},{"key":"1497_CR30","first-page":"213","volume-title":"European conference on computer vision","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., et al.: End-to-end object detection with transformers. In: European conference on computer vision, pp. 213\u2013229. Springer International Publishing, Cham (2020)"},{"key":"1497_CR31","doi-asserted-by":"crossref","unstructured":"Chen, H., Wang, Y., Guo, T., et al.: Pre-trained image processing transformer. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 12299\u201312310 (2021)","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"1497_CR32","doi-asserted-by":"crossref","unstructured":"He, S., Luo, H., Wang, P., et al.: Transreid: Transformer-based object re-identification. Proceedings of the IEEE\/CVF international conference on computer vision. 15013\u201315022 (2021)","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"1497_CR33","doi-asserted-by":"crossref","unstructured":"Caesar, H., Uijlings, J., Ferrari, V.: Coco-stuff: thing and stuff classes in context. Proceedings of the IEEE conference on computer vision and pattern recognition. 1209\u20131218 (2018)","DOI":"10.1109\/CVPR.2018.00132"},{"key":"1497_CR34","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., et al.: Microsoft coco: Common objects in context. Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13. Springer International Publishing 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1497_CR35","unstructured":"Liang, J., Cui, Y., Wang, Q., et al.: Clusterfomer: clustering as a universal visual learner. Adv. Neural Inform. Process. Syst. 36 (2024)"},{"key":"1497_CR36","unstructured":"Liang, J., Zhou, T., Liu, D., et al.: Clustseg: Clustering for universal segmentation. arxiv preprint. arxiv:2305.02187, (2023)"},{"key":"1497_CR37","unstructured":"Dongfang, L., et al.: Sg-net: spatial granularity network for one-stage video instance segmentation. In:\u00a0Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021. p. 9816\u20139825 (2021)"},{"key":"1497_CR38","unstructured":"Wenguan, W. et al.: Visual recognition with deep nearest centroids.\u00a0arXiv preprint arXiv:2209.07383 (2022)"},{"key":"1497_CR39","first-page":"12826","volume":"35","author":"W Wang","year":"2022","unstructured":"Wang, W., Liang, J., Liu, D.: Learning equivariant segmentation with instance-unique querying. Adv. Neural Inform. Process. Syst. 35, 12826\u201312840 (2022)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"1497_CR40","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou, B., Zhao, H., Puig, X., et al.: Semantic understanding of scenes through the ade20k dataset. Int. J. Comput. Vision 127, 302\u2013321 (2019)","journal-title":"Int. J. Comput. Vision"},{"key":"1497_CR41","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., et al.: The cityscapes dataset for semantic urban scene understanding[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"1497_CR42","unstructured":"Bonnaerens, M., Dambre, J.: Learned thresholds token merging and pruning for vision transformers[J]. arxiv preprint arxiv:2307.10780 (2023)"},{"key":"1497_CR43","unstructured":"Heo, J.H., Azizi, S., Fayyazi, A. et al.: Training-free acceleration of ViTs with delayed spatial merging[C]\/\/Workshop on Efficient Systems for Foundation Models II@ ICML2024"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01497-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01497-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01497-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T09:20:19Z","timestamp":1734340819000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01497-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,26]]},"references-count":43,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1497"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01497-4","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,26]]},"assertion":[{"value":"18 December 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}],"article-number":"364"}}