{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T17:21:05Z","timestamp":1742923265266,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030625788"},{"type":"electronic","value":"9783030625795"}],"license":[{"start":{"date-parts":[[2020,11,3]],"date-time":"2020-11-03T00:00:00Z","timestamp":1604361600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,11,3]],"date-time":"2020-11-03T00:00:00Z","timestamp":1604361600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-62579-5_18","type":"book-chapter","created":{"date-parts":[[2020,11,2]],"date-time":"2020-11-02T06:02:40Z","timestamp":1604296960000},"page":"256-273","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Fine-Tuning of VQA Models in Public Datasets"],"prefix":"10.1007","author":[{"given":"Miguel E.","family":"Ortiz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luis M.","family":"Bergasa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roberto","family":"Arroyo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergio","family":"\u00c1lvarez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aitor","family":"Aller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,3]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Antol, S., Agrawal, A., Lu, J., Mitchell, M., Batra, D., Zitnick, C.L., Parikh, D.: VQA: visual question answering. In: International Conference on Computer Vision (ICCV), pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 21\u201329 (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"18_CR3","unstructured":"Malinowski, M., Fritz, M.: A multi-world approach to question answering about real-world scenes based on uncertain input. In: Conference on Neural Information Processing Systems (NeurIPS), pp. 1682\u20131690 (2014)"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Teney, D., Anderson, P., He, X., Van Den Hengel, A.: Tips and tricks for visual question answering: learnings from the 2017 challenge. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4223\u20134232 (2018)","DOI":"10.1109\/CVPR.2018.00444"},{"key":"18_CR5","unstructured":"Yi, K., Wu, J., Gan, C., Torralba, A., Kohli, P., Tenenbaum, J.: Neural-symbolic VQA: disentangling reasoning from vision and language understanding. In: Conference on Neural Information Processing Systems (NeurIPS), pp. 1031\u20131042 (2018)"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., van der Maaten, L., Fei-Fei, L., Lawrence Zitnick, C., Girshick, R.: CLEVR: a diagnostic dataset for compositional language and elementary visual reasoning. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2901\u20132910 (2017)","DOI":"10.1109\/CVPR.2017.215"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Liang, J., Jiang, L., Cao, L., Li, L.-J., Hauptmann, A.G.: Focal visual-text attention for visual question answering. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6135\u20136143 (2018)","DOI":"10.1109\/CVPR.2018.00642"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Anderson, P., He, X., Buehler, C., Teney, D., Johnson, M., Gould, S., Zhang, L.: Bottom-up and top-down attention for image captioning and visual question answering. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"18_CR9","unstructured":"Jiang, Y., Natarajan, V., Chen, X., Rohrbach, M., Batra, D., Parikh, D.: Pythia v0.1: The Winning Entry to the VQA Challenge 2018. arXiv preprint \narXiv:1807.09956\n\n (2018)"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Wu, C., Liu, J., Wang, X., Li, R.: Differential networks for visual question answering. In: AAAI Conference on Artificial Intelligence (AAAI), vol. 33, pp. 8997\u20139004 (2019)","DOI":"10.1609\/aaai.v33i01.33018997"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Singh, A., Natarajan, V., Shah, M., Jiang, Y., Chen, X., Batra, D., Parikh, D., Rohrbach, M.: Towards VQA models that can read. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8317\u20138326 (2019)","DOI":"10.1109\/CVPR.2019.00851"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Lin, T., Maire, M., Belongie, S.J., Bourdev, L.D., Girshick, R.B., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: Common Objects in Context. arXiv preprint \narXiv:1405.0312\n\n (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"18_CR13","unstructured":"VQA-v2 (Online) (2020). \nhttps:\/\/visualqa.org\/download.html"},{"key":"18_CR14","unstructured":"PyTorch (Online) (2020). \nhttps:\/\/pytorch.org\/"},{"key":"18_CR15","unstructured":"FAIR Framework (Online) (2020). \nhttps:\/\/github.com\/facebookresearch\/mmf"},{"key":"18_CR16","unstructured":"Detectron (Online) (2020). \nhttps:\/\/github.com\/facebookresearch\/Detectron"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1492\u20131500 (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: GloVe: global vectors for word representation. In: Conference on Empirical Methods in Natural Language Processing (EMNLP), vol. 14, pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"18_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: A Method for Stochastic Optimization. arXiv preprint \narXiv:1412.6980\n\n (2014)"}],"container-title":["Advances in Intelligent Systems and Computing","Advances in Physical Agents II"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-62579-5_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,2]],"date-time":"2020-11-02T06:10:15Z","timestamp":1604297415000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-62579-5_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,3]]},"ISBN":["9783030625788","9783030625795"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-62579-5_18","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2020,11,3]]},"assertion":[{"value":"3 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WAF","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop of Physical Agents","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Alcal\u00e1 de Henares, Madrid","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"waf2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.robesafe.uah.es\/waf2020","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}