{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:19:22Z","timestamp":1740100762439,"version":"3.37.3"},"reference-count":71,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,19]],"date-time":"2022-05-19T00:00:00Z","timestamp":1652918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,19]],"date-time":"2022-05-19T00:00:00Z","timestamp":1652918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,19]]},"DOI":"10.1109\/eit53891.2022.9813988","type":"proceedings-article","created":{"date-parts":[[2022,7,7]],"date-time":"2022-07-07T19:32:19Z","timestamp":1657222339000},"page":"139-146","source":"Crossref","is-referenced-by-count":4,"title":["Recent, Rapid Advancement in Visual Question Answering: a Review"],"prefix":"10.1109","author":[{"given":"Venkat","family":"Kodali","sequence":"first","affiliation":[{"name":"University of Arkansas at Little Rock,Department of Information Science,Little Rock,USA"}]},{"given":"Daniel","family":"Berleant","sequence":"additional","affiliation":[{"name":"University of Arkansas at Little Rock,Department of Information Science,Little Rock,USA"}]}],"member":"263","reference":[{"key":"ref71","article-title":"DocVQ: a dataset for VQA on document images","author":"mathew","year":"2021","journal-title":"WACV - IEEE Winter Conf on Applications of Computer Vision"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00280"},{"article-title":"An attention based convolutional neural network for visual question answering","year":"2015","author":"chen","key":"ref39"},{"article-title":"Fact-based visual question answering","year":"2016","author":"wang","key":"ref38"},{"year":"0","key":"ref33"},{"year":"2007","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.540"},{"article-title":"Microsoft COCO: common objects in context","year":"2015","author":"lin","key":"ref30"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1044"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"year":"0","key":"ref35"},{"journal-title":"Radiology Data from The Cancer Genome Atlas Lung Adenocarcinoma [TCGA-LUAD] collection The Cancer Imaging Archive","year":"2016","author":"brad","key":"ref34"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_10"},{"article-title":"Co-attending free-form regions and detections with multimodal multiplicative feature embedding for visual question answering","year":"2018","author":"lu","key":"ref62"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_2"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00205"},{"article-title":"CheXNet: radiologist-level pneumonia detection on chest X-rays with deep learning","year":"2010","author":"rajpurkar","key":"ref28"},{"article-title":"Overview of the VQA-Med task at ImageCLEF 2020: visual question answering and generation in the medical domain","year":"2020","author":"yu","key":"ref64"},{"year":"2010","key":"ref27"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.63"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01251"},{"article-title":"A multi-world approach to question answering about real-world scenes based on uncertain input","year":"2014","author":"malinowski","key":"ref29"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107538"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.353"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00864"},{"article-title":"AI, native supercomputing and the revival of Moore's law","year":"2020","author":"lu","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09832-7"},{"article-title":"ImageNet classification with deep convolutional neural networks","year":"2012","author":"krizhevsky","key":"ref20"},{"year":"2018","key":"ref22","article-title":"ImageCLEFF\/LifeCLEF - multimedia retrieval in CLEF"},{"article-title":"Going deeper with convolutions","year":"2014","author":"szegedy","key":"ref21"},{"article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","year":"2019","author":"devlin","key":"ref24"},{"year":"2021","key":"ref23","article-title":"Visual question answering in the medical domain"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1150"},{"year":"2019","key":"ref25","article-title":"One hot encoding"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00971"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2019.115648"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2817340"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_29"},{"article-title":"Speech-based visual question answering","year":"2017","author":"zhang","key":"ref57"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.446"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.344"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.145"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1167\/16.12.332"},{"article-title":"Dual attention network for visual question answering","year":"2016","author":"xu","key":"ref52"},{"key":"ref10","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc of the 31st Int Conf on Neural Information Processing Systems (NIPS'17)"},{"article-title":"Zhejiang University at ImageCLEF 2019 visual question answering in the medical domain","year":"2019","author":"yan","key":"ref11"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.499"},{"article-title":"NLM at ImageCLEF 2018 visual question answering in the medical domain","year":"2018","author":"abacha","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-010-0001-0"},{"article-title":"Natural language engineering","year":"2017","author":"church","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"article-title":"Densely connected convolutional networks","year":"2016","author":"huang","key":"ref17"},{"article-title":"Very deep convolutional networks for large-scale image recognition","year":"2015","author":"simonyan","key":"ref18"},{"article-title":"Deep residual learning for image recognition","year":"2015","author":"he","key":"ref19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"article-title":"A massive 8-year effort finds that much cancer research can't be replicated","year":"2020","author":"haelle","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1422953112"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.538"},{"article-title":"An image is worth 16x16 words: transformers for image recognition at scale","year":"2021","author":"dosovitskiy","key":"ref7"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01081"},{"key":"ref9","article-title":"Deep learning for visual question answering","author":"singh","year":"2016","journal-title":"Facebook AI Research"},{"article-title":"A simple and performant baseline for vision and language","year":"2019","author":"li","key":"ref46"},{"article-title":"Vl-bert: Pre-training of generic visual-linguistic representations","year":"2019","author":"su","key":"ref45"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01028"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01041"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00644"},{"article-title":"A focused dynamic attention model for visual question answering","year":"2016","author":"ilievski","key":"ref41"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"article-title":"Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","year":"2019","author":"lu","key":"ref43"}],"event":{"name":"2022 IEEE International Conference on Electro Information Technology (eIT)","start":{"date-parts":[[2022,5,19]]},"location":"Mankato, MN, USA","end":{"date-parts":[[2022,5,21]]}},"container-title":["2022 IEEE International Conference on Electro Information Technology (eIT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9813697\/9813750\/09813988.pdf?arnumber=9813988","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,5]],"date-time":"2022-08-05T00:45:16Z","timestamp":1659660316000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9813988\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,19]]},"references-count":71,"URL":"https:\/\/doi.org\/10.1109\/eit53891.2022.9813988","relation":{},"subject":[],"published":{"date-parts":[[2022,5,19]]}}}