{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:26:51Z","timestamp":1759332411256,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785049"},{"type":"electronic","value":"9789819785056"}],"license":[{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8505-6_1","type":"book-chapter","created":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T22:02:53Z","timestamp":1730930573000},"page":"3-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Contextual Feature-Based Medical Visual Question Answering Aided by\u00a0Learnable Matrix"],"prefix":"10.1007","author":[{"given":"Cheng","family":"Gong","sequence":"first","affiliation":[]},{"given":"Haiwei","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Haiyan","family":"Lan","sequence":"additional","affiliation":[]},{"given":"Kejia","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Shuning","family":"He","sequence":"additional","affiliation":[]},{"given":"Xiteng","family":"Jia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Antol, S., Agrawal, A., Lu, J., Mitchell, M., Batra, D., Zitnick, C.L., Parikh, D.: VQA: Visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"1_CR2","unstructured":"Hasan, S.A., Ling, Y., Farri, O., Liu, J., M\u00fcller, H., Lungren, M.P.: Overview of ImageCLEF 2018 medical domain visual question answering task. In: CLEF 2018 Working Notes (2018)"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Kovaleva, O., Shivade, C., Kashyap, S., Kanjaria, K., Wu, J., Ballah, D., Coy, A., Karargyris, A., Guo, Y., Beymer, D.B., et al.: Towards visual dialog for radiology. In: Proceedings of the 19th SIGBioMed Workshop on Biomedical Language Processing, pp. 60\u201369 (2020)","DOI":"10.18653\/v1\/2020.bionlp-1.6"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Lin, Z., Zhang, D., Tao, Q., Shi, D., Haffari, G., Wu, Q., He, M., Ge, Z.: Medical visual question answering: a survey. Artif. Intell. Med., 102611 (2023)","DOI":"10.1016\/j.artmed.2023.102611"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Nguyen, B.D., Do, T.-T., Nguyen, B.X., Do, T., Tjiputra, E., Tran, Q.D.: Overcoming data limitation in medical visual question answering. In: Medical Image Computing and Computer Assisted Intervention\u2014MICCAI 2019: 22nd International Conference, Shenzhen, China, October 13-17, 2019. Proceedings, Part IV, vol. 22, pp. 522\u2013530. Springer (2019)","DOI":"10.1007\/978-3-030-32251-9_57"},{"key":"1_CR6","doi-asserted-by":"publisher","first-page":"3332","DOI":"10.1109\/TMI.2022.3185008","volume":"41","author":"H Gong","year":"2022","unstructured":"Gong, H., Chen, G., Mao, M., Li, Z., Li, G.: VQAMix: conditional triplet mixup for medical visual question answering. IEEE Trans. Med. Imaging 41, 3332\u20133343 (2022)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked Attention Networks for Image Question Answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 21\u201329 (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"1_CR8","unstructured":"Kim, J.-H., Jun, J., Zhang, B.-T.: Bilinear attention networks. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Cui, Y., Tao, D., Tian, Q.: Deep modular co-attention networks for visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6281\u20136290 (2019)","DOI":"10.1109\/CVPR.2019.00644"},{"key":"1_CR10","unstructured":"Chen, G., Gong, H., Li, G.: HCP-MIC at VQA-Med 2020: effective visual representation for medical visual question answering. In: CLEF 2020 Working Notes (2020)"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Do, T., Nguyen, B.X., Tjiputra, E., Tran, M., Tran, Q.D., Nguyen, A.: Multiple Meta-model quantifying for medical visual question answering. In: Medical Image Computing and Computer Assisted Intervention\u2014MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part V, vol. 24, pp. 64\u201374. Springer (2021)","DOI":"10.1007\/978-3-030-87240-3_7"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Zhan, L.-M., Liu, B., Fan, L., Chen, J., Wu, X.-M.: Medical visual question answering via conditional reasoning. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 2345\u20132354 (2020)","DOI":"10.1145\/3394171.3413761"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Chen, Z., Du, Y., Hu, J., Liu, Y., Li, G., Wan, X., Chang, T.-H.: Multi-modal masked autoencoders for medical vision-and-language pre-training. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 679\u2013689. Springer (2022)","DOI":"10.1007\/978-3-031-16443-9_65"},{"key":"1_CR14","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1_CR15","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"1_CR16","doi-asserted-by":"crossref","unstructured":"Pan, X., Ge, C., Lu, R., Song, S., Chen, G., Huang, Z., Huang, G.: On the integration of self-attention and convolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 815\u2013825 (2022)","DOI":"10.1109\/CVPR52688.2022.00089"},{"key":"1_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2018.251","volume":"5","author":"JJ Lau","year":"2018","unstructured":"Lau, J.J., Gayen, S., Ben Abacha, A., Demner-Fushman, D.: A dataset of clinically generated visual questions and answers about radiology images. Scientific Data 5, 1\u201310 (2018)","journal-title":"Scientific Data"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Liu, B., Zhan, L.-M., Xu, L., Ma, L., Yang, Y., Wu, X.-M.: SLAKE: A semantically-labeled knowledge-enhanced dataset for medical visual question answering. In: 2021 IEEE 18th International Symposium on Biomedical Imaging (ISBI). IEEE, pp. 1650\u20131654 (2021)","DOI":"10.1109\/ISBI48211.2021.9434010"},{"key":"1_CR19","unstructured":"Foret, P., Kleiner, A., Mobahi, H., Neyshabur, B.: Sharpness-Aware Minimization for Efficiently Improving Generalization. arXiv preprint arXiv:2010.01412 (2020)"},{"key":"1_CR20","doi-asserted-by":"publisher","first-page":"3385","DOI":"10.1109\/TMI.2022.3185113","volume":"41","author":"F Cong","year":"2022","unstructured":"Cong, F., Xu, S., Guo, L., Tian, Y.: Anomaly matters: an anomaly-oriented model for medical visual question answering. IEEE Trans. Med. Imaging 41, 3385\u20133397 (2022)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Cong, F., Xu, S., Guo, L., Tian, Y.: Caption-aware medical VQA via semantic focusing and progressive cross-modality comprehension. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 3569\u20133577 (2022)","DOI":"10.1145\/3503161.3548122"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Liu, B., Zhan, L.-M., Wu, X.-M.: Contrastive pre-training and representation distillation for medical visual question answering based on radiology images. In: Medical Image Computing and Computer Assisted Intervention\u2014MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part II, vol. 24, pp. 210\u2013220. Springer (2021)","DOI":"10.1007\/978-3-030-87196-3_20"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, A., Tao, W., Li, Z., Wang, H., Zhang, W.: Type-aware medical visual question answering. In: ICASSP 2022\u20142022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4838\u20134842. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9747087"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8505-6_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T22:03:44Z","timestamp":1730930624000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8505-6_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,7]]},"ISBN":["9789819785049","9789819785056"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8505-6_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,7]]},"assertion":[{"value":"7 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}