{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T23:28:02Z","timestamp":1778801282019,"version":"3.51.4"},"reference-count":48,"publisher":"Informa UK Limited","issue":"5","funder":[{"name":"Jouf University","award":["DSR-2021-02-0110"],"award-info":[{"award-number":["DSR-2021-02-0110"]}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Cybernetics and Systems"],"published-print":{"date-parts":[[2022,7,4]]},"DOI":"10.1080\/01969722.2021.2018543","type":"journal-article","created":{"date-parts":[[2021,12,29]],"date-time":"2021-12-29T04:03:36Z","timestamp":1640750616000},"page":"403-424","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":24,"title":["Optimal Deep Neural Network-Based Model for Answering Visual Medical Question"],"prefix":"10.1080","volume":"53","author":[{"given":"Karim","family":"Gasmi","sequence":"first","affiliation":[{"name":"Department of Computer Science, College of Arts and Sciences at Tabarjal, Jouf University, Jouf, Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ibtihel Ben","family":"Ltaifa","sequence":"additional","affiliation":[{"name":"STIH, Sorbonne Universit\u00e9, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ga\u00ebl","family":"Lejeune","sequence":"additional","affiliation":[{"name":"STIH, Sorbonne Universit\u00e9, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hamoud","family":"Alshammari","sequence":"additional","affiliation":[{"name":"Department of Information Systems, College of Computer and Information Sciences, Jouf University, Jouf, Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lassaad Ben","family":"Ammar","sequence":"additional","affiliation":[{"name":"College of Sciences and Humanities, Prince Sattam bin Abdulaziz University, Al-Kharj, Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahmood A.","family":"Mahmood","sequence":"additional","affiliation":[{"name":"Department of Information Systems, College of Computer and Information Sciences, Jouf University, Jouf, Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","published-online":{"date-parts":[[2021,12,28]]},"reference":[{"key":"CIT0001","unstructured":"Abacha, A. B., S. A. Hasan, V. Datla, J. Liu, D. Demner-Fushman, and H. M\u00a8Uller. Vqa-med: Overview of the medical visual question answering task at imageclef 2019, in CLEF, 2019."},{"key":"CIT0002","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0966-6"},{"key":"CIT0003","unstructured":"Al-Sadi, A., H. Al-Theiabat, and M. Al-Ayyoub. The inception team at vqa-med 2020: Pretrained vgg with data augmentation for medical vqa and vqg. CLEF, 2020."},{"key":"CIT0004","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3001"},{"key":"CIT0005","unstructured":"Chen, G., H. Gong, and G. Li. Hcp-mic at vqa-med 2020: Effective visual representation for medical visual question answering, in CLEF, 2020."},{"key":"CIT0006","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609628"},{"key":"CIT0007","unstructured":"Devlin, J., M.W. Chang, K. Lee, and K. Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding, in NAACL, 2019."},{"key":"CIT0008","unstructured":"Emilio, G.G. Artificial intelligence in medicine and healthcare: applications, availability and societal impact, 2020."},{"key":"CIT0009","doi-asserted-by":"publisher","DOI":"10.17265\/2159-5313\/2016.09.003"},{"key":"CIT0010","unstructured":"Gao, H., J. Mao, J. Zhou, Z. Huang, L. Wang, and W. Xu. Are you talking to a machine? dataset and methods for multilingual image question, NIPS, 2015."},{"key":"CIT0011","doi-asserted-by":"publisher","DOI":"10.1109\/ICICS52457.2021.9464540"},{"key":"CIT0012","doi-asserted-by":"publisher","DOI":"10.3390\/s20174897"},{"key":"CIT0013","doi-asserted-by":"publisher","DOI":"10.1145\/3042064"},{"key":"CIT0014","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58219-7_22"},{"key":"CIT0015","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.538"},{"key":"CIT0016","unstructured":"Kim, J.H., J. Jun, and B.T. Zhang. Bilinear attention networks, in NeurIPS, 2018."},{"key":"CIT0017","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"CIT0018","doi-asserted-by":"publisher","DOI":"10.3233\/SW-140134"},{"key":"CIT0019","doi-asserted-by":"publisher","DOI":"10.17265\/2159-5313\/2016.09.003"},{"key":"CIT0020","unstructured":"Liao, Z., Q. Wu, C. Shen, A. van den Hengel, and J. W. Verjans. Aiml at vqa-med 2020: Knowledge inference via a skeleton-based sentence mapping approach for medical domain visual question answering, CLEF, 2020."},{"key":"CIT0021","doi-asserted-by":"publisher","DOI":"10.17265\/2159-5313\/2016.09.003"},{"key":"CIT0022","doi-asserted-by":"publisher","DOI":"10.17265\/2159-5313\/2016.09.003"},{"key":"CIT0023","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.9"},{"key":"CIT0024","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"CIT0025","unstructured":"Mohamed, S. S. N., and K. Srinivasan. Imageclef 2020: An approach for visual question answering using vgg-lstm for different datasets, in CLEF, 2020."},{"key":"CIT0026","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00637"},{"key":"CIT0027","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.11"},{"key":"CIT0028","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"CIT0029","doi-asserted-by":"publisher","DOI":"10.17265\/2159-5313\/2016.09.003"},{"key":"CIT0030","unstructured":"Ren, M., R. Kiros, and R. S. Zemel. Image question answering: A visual semantic embedding model and a new dataset, ArXiv, vol. abs\/1505.02074, 2015."},{"key":"CIT0031","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-006-5833-1"},{"key":"CIT0032","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2017.8019436"},{"key":"CIT0033","unstructured":"Sarrouti, M. Nlm at vqa-med 2020: Visual question answering and generation in the medical domain, in CLEF, 2020."},{"key":"CIT0034","unstructured":"Shen, K., and M. Kejriwal. A data-driven study of commonsense knowledge using the conceptnet knowledge base, ArXiv, vol. abs\/2011.14084, 2020."},{"key":"CIT0035","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.499"},{"key":"CIT0036","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btx238"},{"key":"CIT0037","unstructured":"Tan, M., and Q. V. Le. Efficientnet: Rethinking model scaling for convolutional neural networks, ArXiv, vol. abs\/1905.11946, 2019."},{"key":"CIT0038","doi-asserted-by":"crossref","unstructured":"Teney, D., P. Anderson, X. He, and A. van den Hengel. 2018. Tips and tricks for visual question answering: Learnings from the 2017 challenge. 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 4223\u201332.","DOI":"10.1109\/CVPR.2018.00444"},{"key":"CIT0039","unstructured":"Torfi, A., R. A. Shirvani, Y. Keneshloo, N. Tavvaf, and E. A. Fox. Natural language processing advancements by deep learning: A survey. ArXiv, vol. abs\/2003.01200. 2020."},{"key":"CIT0040","unstructured":"Verma, H., and S. Ramachandran. Harendrakv at vqa-med 2020: Sequential vqa with attention for medical visual question answering, in CLEF, 2020."},{"key":"CIT0041","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/179"},{"key":"CIT0042","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2754246"},{"key":"CIT0043","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2017.05.001"},{"key":"CIT0044","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.500"},{"key":"CIT0045","doi-asserted-by":"publisher","DOI":"10.3115\/981732.981751"},{"key":"CIT0046","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00644"},{"key":"CIT0047","unstructured":"Zhou, B., Y. Tian, S. Sukhbaatar, A. D. Szlam, and R. Fergus. Simple baseline for visual question answering, ArXiv, vol. abs\/1512.02167, 2015."},{"key":"CIT0048","unstructured":"Zhu, Y., C. Zhang, C. R\u2019e, and L. Fei-Fei. Building a large-scale multimodal knowledge base system for answering visual queries, arXiv: Computer Vision and Pattern Recognition, 2015."}],"container-title":["Cybernetics and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/01969722.2021.2018543","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T12:37:21Z","timestamp":1651063041000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/01969722.2021.2018543"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,28]]},"references-count":48,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022,7,4]]}},"alternative-id":["10.1080\/01969722.2021.2018543"],"URL":"https:\/\/doi.org\/10.1080\/01969722.2021.2018543","relation":{},"ISSN":["0196-9722","1087-6553"],"issn-type":[{"value":"0196-9722","type":"print"},{"value":"1087-6553","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,12,28]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=ucbs20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=ucbs20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2021-12-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}