{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T06:56:36Z","timestamp":1769064996202,"version":"3.49.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,2,29]],"date-time":"2024-02-29T00:00:00Z","timestamp":1709164800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,29]],"date-time":"2024-02-29T00:00:00Z","timestamp":1709164800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["U22A2033"],"award-info":[{"award-number":["U22A2033"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62206242"],"award-info":[{"award-number":["62206242"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"National Natural Science Foundation Regional Innovation and Development Joint Fund","award":["U20A20386"],"award-info":[{"award-number":["U20A20386"]}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFE0210700"],"award-info":[{"award-number":["2022YFE0210700"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Key Research and Development Program","award":["2023C03194"],"award-info":[{"award-number":["2023C03194"]}]},{"name":"Applied Research of Public Welfare Technology of Zhejiang Province","award":["LGF22H120007"],"award-info":[{"award-number":["LGF22H120007"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s00530-024-01260-9","type":"journal-article","created":{"date-parts":[[2024,2,29]],"date-time":"2024-02-29T16:02:28Z","timestamp":1709222548000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Mmy-net: a multimodal network exploiting image and patient metadata for simultaneous segmentation and diagnosis"],"prefix":"10.1007","volume":"30","author":[{"given":"Renshu","family":"Gu","sequence":"first","affiliation":[]},{"given":"Yueyu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lisha","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Dechao","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yaqi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ruiquan","family":"Ge","sequence":"additional","affiliation":[]},{"given":"Zicheng","family":"Jiao","sequence":"additional","affiliation":[]},{"given":"Juan","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Gangyong","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Linyan","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,29]]},"reference":[{"key":"1260_CR1","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1016\/j.ins.2017.01.011","volume":"385","author":"X Yang","year":"2017","unstructured":"Yang, X., Liu, W., Tao, D., Cheng, J.: Canonical correlation analysis networks for two-view image recognition. Inf. Sci. 385, 338\u2013352 (2017)","journal-title":"Inf. Sci."},{"issue":"8","key":"1260_CR2","doi-asserted-by":"publisher","first-page":"2927","DOI":"10.1109\/TCYB.2018.2833843","volume":"49","author":"W Liu","year":"2018","unstructured":"Liu, W., Ma, X., Zhou, Y., Tao, D., Cheng, J.: $$p$$-laplacian regularization for scene recognition. IEEE Trans. Cybern. 49(8), 2927\u20132940 (2018)","journal-title":"IEEE Trans. Cybern."},{"key":"1260_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107658","volume":"110","author":"W Liu","year":"2021","unstructured":"Liu, W., Li, J., Liu, B., Guan, W., Zhou, Y., Xu, C.: Unified cross-domain classification via geometric and statistical adaptations. Pattern Recogn. 110, 107658 (2021)","journal-title":"Pattern Recogn."},{"issue":"11","key":"1260_CR4","doi-asserted-by":"publisher","first-page":"8082","DOI":"10.1109\/TPAMI.2021.3083269","volume":"44","author":"B Zhang","year":"2021","unstructured":"Zhang, B., Xiao, J., Jiao, J., Wei, Y., Zhao, Y.: Affinity attention graph neural network for weakly supervised semantic segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 44(11), 8082\u20138096 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1260_CR5","doi-asserted-by":"crossref","unstructured":"Zhang, B., Xiao, J., Wei, Y., Zhao, Y.: Credible dual-expert learning for weakly supervised semantic segmentation. Int. J. Comput. Vis., 1\u201317 (2023)","DOI":"10.1007\/s11263-023-01796-9"},{"key":"1260_CR6","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1260_CR7","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: International Conference on Medical Image Computing and Computer-assisted Intervention, pp. 234\u2013241. Springer (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"issue":"4","key":"1260_CR8","doi-asserted-by":"publisher","first-page":"1239","DOI":"10.1007\/s00530-021-00840-3","volume":"28","author":"RL Ara\u00fajo","year":"2022","unstructured":"Ara\u00fajo, R.L., Ara\u00fajo, F.H.D., Silva, R.R.E.: Automatic segmentation of melanoma skin cancer using transfer learning and fine-tuning. Multimedia Syst. 28(4), 1239\u20131250 (2022)","journal-title":"Multimedia Syst."},{"issue":"3","key":"1260_CR9","doi-asserted-by":"publisher","first-page":"1586","DOI":"10.1002\/mp.16072","volume":"50","author":"W Xu","year":"2023","unstructured":"Xu, W., Bian, Y., Lu, Y., Meng, Q., Zhu, W., Shi, F., Chen, X., Shao, C., Xiang, D.: Semi-supervised interactive fusion network for mr image segmentation. Med. Phys. 50(3), 1586\u20131600 (2023)","journal-title":"Med. Phys."},{"key":"1260_CR10","doi-asserted-by":"crossref","unstructured":"Huang, H., Lin, L., Tong, R., Hu, H., Zhang, Q., Iwamoto, Y., Han, X., Chen, Y.-W., Wu, J.: Unet 3+: a full-scale connected unet for medical image segmentation. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1055\u20131059 (2020). IEEE","DOI":"10.1109\/ICASSP40776.2020.9053405"},{"key":"1260_CR11","unstructured":"Huang, L., Yuan, Y., Guo, J., Zhang, C., Chen, X., Wang, J.: Interlaced sparse self-attention for semantic segmentation (2019). arXiv preprint arXiv:1907.12273"},{"key":"1260_CR12","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1016\/j.media.2016.08.008","volume":"35","author":"K Sirinukunwattana","year":"2017","unstructured":"Sirinukunwattana, K., Pluim, J.P., Chen, H., Qi, X., Heng, P.-A., Guo, Y.B., Wang, L.Y., Matuszewski, B.J., Bruni, E., Sanchez, U., et al.: Gland segmentation in colon histology images: the glas challenge contest. Med. Image Anal. 35, 489\u2013502 (2017)","journal-title":"Med. Image Anal."},{"issue":"4","key":"1260_CR13","doi-asserted-by":"publisher","first-page":"1185","DOI":"10.1109\/JBHI.2020.3015844","volume":"25","author":"Z Wen","year":"2021","unstructured":"Wen, Z., Feng, R., Liu, J., Li, Y., Ying, S.: Gcsba-net: gabor-based and cascade squeeze bi-attention network for gland segmentation. IEEE J. Biomed. Health Inform. 25(4), 1185\u20131196 (2021). https:\/\/doi.org\/10.1109\/JBHI.2020.3015844","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"1260_CR14","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Rahman\u00a0Siddiquee, M.M., Tajbakhsh, N., Liang, J.: Unet++: a nested u-net architecture for medical image segmentation. In: Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support, pp. 3\u201311. Springer, Berlin (2018)","DOI":"10.1007\/978-3-030-00889-5_1"},{"key":"1260_CR15","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1016\/j.isprsjprs.2020.01.013","volume":"162","author":"FI Diakogiannis","year":"2020","unstructured":"Diakogiannis, F.I., Waldner, F., Caccetta, P., Wu, C.: Resunet-a: a deep learning framework for semantic segmentation of remotely sensed data. ISPRS J. Photogramm. Remote. Sens. 162, 94\u2013114 (2020)","journal-title":"ISPRS J. Photogramm. Remote. Sens."},{"key":"1260_CR16","unstructured":"Oktay, O., Schlemper, J., Folgoc, L.L., Lee, M., Heinrich, M., Misawa, K., Mori, K., McDonagh, S., Hammerla, N.Y., Kainz, B., et al.: Attention u-net: learning where to look for the pancreas (2018). arXiv preprint arXiv:1804.03999"},{"key":"1260_CR17","doi-asserted-by":"crossref","unstructured":"Alom, M.Z., Hasan, M., Yakopcic, C., Taha, T.M., Asari, V.K.: Recurrent residual convolutional neural network based on u-net (r2u-net) for medical image segmentation (2018). arXiv preprint arXiv:1802.06955","DOI":"10.1109\/NAECON.2018.8556686"},{"issue":"2","key":"1260_CR18","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1109\/JBHI.2019.2912935","volume":"24","author":"S Guan","year":"2019","unstructured":"Guan, S., Khan, A.A., Sikdar, S., Chitnis, P.V.: Fully dense unet for 2-d sparse photoacoustic tomography artifact removal. IEEE J. Biomed. Health Inform. 24(2), 568\u2013576 (2019)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"1260_CR19","doi-asserted-by":"crossref","unstructured":"Mehta, S., Mercan, E., Bartlett, J., Weaver, D., Elmore, J.G., Shapiro, L.: Y-net: joint segmentation and classification for diagnosis of breast biopsy images. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 893\u2013901 (2018). Springer","DOI":"10.1007\/978-3-030-00934-2_99"},{"key":"1260_CR20","unstructured":"Cao, H., Wang, Y., Chen, J., Jiang, D., Zhang, X., Tian, Q., Wang, M.: Swin-unet: Unet-like pure transformer for medical image segmentation (2021). arXiv preprint arXiv:2105.05537"},{"key":"1260_CR21","unstructured":"Chen, J., Lu, Y., Yu, Q., Luo, X., Adeli, E., Wang, Y., Lu, L., Yuille, A.L., Zhou, Y.: Transunet: Transformers make strong encoders for medical image segmentation (2021). arXiv preprint arXiv:2102.04306"},{"key":"1260_CR22","doi-asserted-by":"crossref","unstructured":"Song, Q., Li, J., Li, C., Guo, H., Huang, R.: Fully attentional network for semantic segmentation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2280\u20132288 (2022)","DOI":"10.1609\/aaai.v36i2.20126"},{"key":"1260_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, W., Huang, Z., Luo, G., Chen, T., Wang, X., Liu, W., Yu, G., Shen, C.: Topformer: token pyramid transformer for mobile semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12083\u201312093 (2022)","DOI":"10.1109\/CVPR52688.2022.01177"},{"issue":"10","key":"1260_CR24","doi-asserted-by":"publisher","first-page":"1993","DOI":"10.1109\/TMI.2014.2377694","volume":"34","author":"BH Menze","year":"2014","unstructured":"Menze, B.H., Jakab, A., Bauer, S., Kalpathy-Cramer, J., Farahani, K., Kirby, J., Burren, Y., Porz, N., Slotboom, J., Wiest, R., et al.: The multimodal brain tumor image segmentation benchmark (brats). IEEE Trans. Med. Imaging 34(10), 1993\u20132024 (2014)","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"5","key":"1260_CR25","doi-asserted-by":"publisher","first-page":"1116","DOI":"10.1109\/TMI.2018.2878669","volume":"38","author":"J Dolz","year":"2018","unstructured":"Dolz, J., Gopinath, K., Yuan, J., Lombaert, H., Desrosiers, C., Ayed, I.B.: Hyperdense-net: a hyper-densely connected cnn for multi-modal image segmentation. IEEE Trans. Med. Imaging 38(5), 1116\u20131126 (2018)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1260_CR26","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1016\/j.neunet.2019.08.025","volume":"121","author":"N Ibtehaz","year":"2020","unstructured":"Ibtehaz, N., Rahman, M.S.: Multiresunet: rethinking the u-net architecture for multimodal biomedical image segmentation. Neural Netw. 121, 74\u201387 (2020)","journal-title":"Neural Netw."},{"key":"1260_CR27","doi-asserted-by":"crossref","unstructured":"Yan, K., Tang, Y., Peng, Y., Sandfort, V., Bagheri, M., Lu, Z., Summers, R.M.: Mulan: multitask universal lesion analysis network for joint lesion detection, tagging, and segmentation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 194\u2013202 (2019). Springer","DOI":"10.1007\/978-3-030-32226-7_22"},{"key":"1260_CR28","doi-asserted-by":"crossref","unstructured":"Yan, K., Peng, Y., Sandfort, V., Bagheri, M., Lu, Z., Summers, R.M.: Holistic and comprehensive annotation of clinically significant findings on diverse ct images: learning from radiology reports and label ontology. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8523\u20138532 (2019)","DOI":"10.1109\/CVPR.2019.00872"},{"key":"1260_CR29","doi-asserted-by":"crossref","unstructured":"Wang, P., Chung, A.: Doubleu-net: colorectal cancer diagnosis and gland instance segmentation with text-guided feature control. In: European Conference on Computer Vision, pp. 338\u2013354 (2020). Springer","DOI":"10.1007\/978-3-030-66415-2_22"},{"key":"1260_CR30","doi-asserted-by":"crossref","unstructured":"Xiao, T., Zheng, H., Wang, X., Chen, X., Chang, J., Yao, J., Shang, H., Liu, P.: Intracerebral haemorrhage growth prediction based on displacement vector field and clinical metadata. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 741\u2013751 (2021). Springer","DOI":"10.1007\/978-3-030-87240-3_71"},{"key":"1260_CR31","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1016\/j.ejca.2021.02.032","volume":"149","author":"J H\u00f6hn","year":"2021","unstructured":"H\u00f6hn, J., Krieghoff-Henning, E., Jutzi, T.B., Kalle, C., Utikal, J.S., Meier, F., Gellrich, F.F., Hobelsberger, S., Hauschild, A., Schlager, J.G., et al.: Combining cnn-based histologic whole slide image analysis and patient data to improve skin cancer classification. Eur. J. Cancer 149, 94\u2013101 (2021)","journal-title":"Eur. J. Cancer"},{"key":"1260_CR32","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding (2018). arXiv preprint arXiv:1810.04805"},{"issue":"8","key":"1260_CR33","doi-asserted-by":"publisher","first-page":"1962","DOI":"10.1109\/TMI.2016.2529665","volume":"35","author":"A Vahadane","year":"2016","unstructured":"Vahadane, A., Peng, T., Sethi, A., Albarqouni, S., Wang, L., Baust, M., Steiger, K., Schlitter, A.M., Esposito, I., Navab, N.: Structure-preserving color normalization and sparse stain separation for histological images. IEEE Trans. Med. Imaging 35(8), 1962\u20131971 (2016)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1260_CR34","doi-asserted-by":"crossref","unstructured":"Berman, M., Triki, A.R., Blaschko, M.B.: The lov\u00e1sz-softmax loss: a tractable surrogate for the optimization of the intersection-over-union measure in neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4413\u20134421 (2018)","DOI":"10.1109\/CVPR.2018.00464"},{"key":"1260_CR35","doi-asserted-by":"crossref","unstructured":"Wang, H., Cao, P., Wang, J., Zaiane, O.R.: Uctransnet: rethinking the skip connections in u-net from a channel-wise perspective with transformer (2021). arXiv preprint arXiv:2109.04335","DOI":"10.1609\/aaai.v36i3.20144"},{"key":"1260_CR36","doi-asserted-by":"crossref","unstructured":"Valanarasu, J.M.J., Oza, P., Hacihaliloglu, I., Patel, V.M.: Medical transformer: gated axial-attention for medical image segmentation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 36\u201346 (2021). Springer","DOI":"10.1007\/978-3-030-87193-2_4"},{"key":"1260_CR37","doi-asserted-by":"crossref","unstructured":"Wazir, S., Fraz, M.M.: Histoseg: Quick attention with multi-loss function for multi-structure segmentation in digital histology images. In: 2022 12th International Conference on Pattern Recognition Systems (ICPRS), pp. 1\u20137 (2022). IEEE","DOI":"10.1109\/ICPRS54038.2022.9854067"},{"key":"1260_CR38","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1260_CR39","doi-asserted-by":"crossref","unstructured":"Peters, M.E., Neumann, M., Iyyer, M., Gardner, M., Clark, C., Lee, K., Zettlemoyer, L.: Deep contextualized word representations. In: NAACL (2018)","DOI":"10.18653\/v1\/N18-1202"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01260-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01260-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01260-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T13:07:39Z","timestamp":1712927259000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01260-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,29]]},"references-count":39,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["1260"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01260-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,29]]},"assertion":[{"value":"30 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"72"}}