{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T22:00:53Z","timestamp":1763071253795,"version":"3.37.3"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T00:00:00Z","timestamp":1673308800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T00:00:00Z","timestamp":1673308800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s11633-022-1360-1","type":"journal-article","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T17:04:43Z","timestamp":1673370283000},"page":"79-91","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Visual Superordinate Abstraction for Robust Concept Learning"],"prefix":"10.1007","volume":"20","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4351-9537","authenticated-orcid":false,"given":"Qi","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao-Yue","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dadong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7225-5449","authenticated-orcid":false,"given":"Da-Cheng","family":"Tao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,10]]},"reference":[{"key":"1360_CR1","doi-asserted-by":"publisher","unstructured":"B. Inhelder, J. Piaget. The early growth of logic in the child: Classification and seriation. Routledge, vol. 83, 2013. DOI: https:\/\/doi.org\/10.4324\/9781315009667.","DOI":"10.4324\/9781315009667"},{"key":"1360_CR2","doi-asserted-by":"publisher","unstructured":"S. Antol, A. Agrawal, J. Lu, M. Mitchell, D. Batra, C. L. Zitnick, D. Parikh. VQA: Visual question answering. In Proceedings of IEEE International Conference on Computer Vision, Santiago, Chile, pp. 2425\u20132433, 2015. DOI: https:\/\/doi.org\/10.1109\/ICCV.2015.279","DOI":"10.1109\/ICCV.2015.279"},{"key":"1360_CR3","doi-asserted-by":"publisher","unstructured":"R. Zellers, Y. Bisk, A. Farhadi, Y. Choi. From recognition to cognition: Visual commonsense reasoning. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Long Beach, USA, pp. 6720\u20136731, 2019. DOI: https:\/\/doi.org\/10.1109\/CVPR.2019.00688","DOI":"10.1109\/CVPR.2019.00688"},{"key":"1360_CR4","doi-asserted-by":"publisher","unstructured":"P. Anderson, Q. Wu, D. Teney, J. Bruce, M. Johnson, N. S\u00fcnderhauf, I. Reid, S. Gould, A. Van Den Hengel. Vision-and-language navigation: Interpreting visually-grounded navigation instructions in real environments. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Salt Lake City, USA, pp. 3674\u20133683, 2018. DOI: https:\/\/doi.org\/10.1109\/CVPR.2018.00387","DOI":"10.1109\/CVPR.2018.00387"},{"key":"1360_CR5","doi-asserted-by":"publisher","unstructured":"D. Mascharka, P. Tran, R. Soklaski, A. Majumdar. Transparency by design: Closing the gap between performance and interpretability in visual reasoning. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Salt Lake City, USA, pp. 4942\u20134950, 2018. DOI: https:\/\/doi.org\/10.1109\/CVPR.2018.00519","DOI":"10.1109\/CVPR.2018.00519"},{"key":"1360_CR6","unstructured":"K. Yi, J. Wu, C. Gan, A. Torralba, P. Kohli, J. B. Tenenbaum. Neural-symbolic VQA: Disentangling reasoning from vision and language understanding. In Proceedings of Advances in Neural Information Processing Systems, Montr\u00e9al, Canada, vol. 31, 2018."},{"key":"1360_CR7","unstructured":"J. Mao, C. Gan, P. Kohli, J. B. Tenenbaum, J. Wu. The neuro-symbolic concept learner: Interpreting scenes, words, and sentences from natural supervision. In Proceedings of International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1360_CR8","doi-asserted-by":"publisher","unstructured":"J. Johnson, B. Hariharan, L. Van Der Maaten, L. Fei-Fei, C. L. Zitnick, R. Girshick. CLEVR: A diagnostic dataset for compositional language and elementary visual reasoning. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Hawaii, USA, pp. 2901\u20132910, 2017. DOI: https:\/\/doi.org\/10.1109\/CVPR.2017.215","DOI":"10.1109\/CVPR.2017.215"},{"key":"1360_CR9","unstructured":"V. Marois, T. Jayram, V. Albouy, T. Kornuta, Y. Bouhadjar, A. S. Ozcan. On transfer learning using a mac model variant. In Proceedings of Workshop of Advances in Neural Information Processing Systems, Montr\u00e9al, Canada, 2018."},{"key":"1360_CR10","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1602.001.0001","volume-title":"The Big Book of Concepts","author":"G Murphy","year":"2004","unstructured":"G. Murphy. The Big Book of Concepts. Cambridge, USA: MIT press, 2004. DOI: https:\/\/doi.org\/10.7551\/mitpress\/1602.001.0001"},{"key":"1360_CR11","doi-asserted-by":"publisher","unstructured":"T. K. Landauer, S. T. Dumais. A solution to plato\u2019s problem: The latent semantic analysis theory of acquisition, induction, and representation of knowledge. Psychological review, vol. 104, no. 2, Article number 211, 1997. DOI: https:\/\/doi.org\/10.1037\/0033-295X.104.2.211.","DOI":"10.1037\/0033-295X.104.2.211"},{"issue":"2","key":"1360_CR12","doi-asserted-by":"publisher","first-page":"203","DOI":"10.3758\/BF03204766","volume":"28","author":"K Lund","year":"1996","unstructured":"K. Lund, C. Burgess. Producing high-dimensional semantic spaces from lexical co-occurrence. Behavior research methods, instruments & computers, vol. 28, no. 2, pp. 203\u2013208, 1996. DOI: https:\/\/doi.org\/10.3758\/BF03204766.","journal-title":"Behavior research methods, instruments & computers"},{"key":"1360_CR13","doi-asserted-by":"publisher","unstructured":"B. M. Lake, G. L. Murphy. Word meaning in minds and machines. Psychological Review, to be published. DOI: https:\/\/doi.org\/10.1037\/rev0000297.","DOI":"10.1037\/rev0000297"},{"issue":"6022","key":"1360_CR14","doi-asserted-by":"publisher","first-page":"1279","DOI":"10.1126\/science.1192788","volume":"331","author":"J B Tenenbaum","year":"2011","unstructured":"J. B. Tenenbaum, C. Kemp, T. L. Griffiths, N. D. Goodman. How to grow a mind: Statistics, structure, and abstraction. Science, vol. 331, no. 6022, pp. 1279\u20131285, 2011. DOI: https:\/\/doi.org\/10.1126\/science.1192788.","journal-title":"Science"},{"issue":"3","key":"1360_CR15","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1016\/0010-0285(76)90013-X","volume":"8","author":"E Rosch","year":"1976","unstructured":"E. Rosch, C. B. Mervis, W. D. Gray, D. M. Johnson, P. Boyes-Braem. Basic objects in natural categories. Cognitive psychology, vol. 8, no. 3, pp. 382\u2013439, 1976. DOI: https:\/\/doi.org\/10.1016\/0010-0285(76)90013-X.","journal-title":"Cognitive psychology"},{"issue":"3","key":"1360_CR16","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1016\/0010-0285(91)90016-H","volume":"23","author":"J W Tanaka","year":"1991","unstructured":"J. W. Tanaka, M. Taylor. Object categories and expertise: Is the basic level in the eye of the beholder? Cognitive psychology, vol. 23, no. 3, pp. 457\u2013482, 1991. DOI: https:\/\/doi.org\/10.1016\/0010-0285(91)90016-H.","journal-title":"Cognitive psychology"},{"key":"1360_CR17","unstructured":"C. Han, J. Mao, C. Gan, J. B. Tenenbaum, J. Wu. Visual concept-metaconcept learning. In Proceedings of Advances in Neural Information Processing Systems, Vancouver, Canada, 2019."},{"issue":"5","key":"1360_CR18","doi-asserted-by":"publisher","first-page":"563","DOI":"10.1007\/s11633-019-1177-8","volume":"16","author":"A Li","year":"2019","unstructured":"A. Li, K. Zhang, L. Wang. Zero-shot fine-grained classification by deep feature learning with semantics. International Journal of Automation and Computing, vol. 16, no. 5, pp. 563\u2013574, 2019. DOI: https:\/\/doi.org\/10.1007\/s11633-019-1177-8.","journal-title":"International Journal of Automation and Computing"},{"issue":"2","key":"1360_CR19","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1007\/s11633-020-1270-z","volume":"18","author":"W Zhu","year":"2021","unstructured":"W. Zhu, W. Sun, X. Min, G. Zhai, X. Yang. Structured computational modeling of human visual system for no-reference image quality assessment. International Journal of Automation and Computing, vol. 18, no. 2, pp. 204\u2013218, 2021. DOI: https:\/\/doi.org\/10.1007\/s11633-020-1270-z.","journal-title":"International Journal of Automation and Computing"},{"key":"1360_CR20","doi-asserted-by":"publisher","unstructured":"J. Johnson, B. Hariharan, L. Van Der Maaten, J. Hoffman, L. Fei-Fei, C. Lawrence Zitnick, R. Girshick. Inferring and executing programs for visual reasoning. In Proceedings of IEEE International Conference on Computer Vision, Venice, Italy, pp. 2989\u20132998, 2017. DOI: https:\/\/doi.org\/10.1109\/ICCV.2017.325","DOI":"10.1109\/ICCV.2017.325"},{"key":"1360_CR21","doi-asserted-by":"publisher","unstructured":"K. He, X. Zhang, S. Ren, J. Sun. Deep residual learning for image recognition. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, USA, pp. 770\u2013778, 2016. DOI: https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"1360_CR22","doi-asserted-by":"publisher","unstructured":"R. Hu, J. Andreas, M. Rohrbach, T. Darrell, K. Saenko. Learning to reason: End-to-end module networks for visual question answering. In Proceedings of IEEE International Conference on Computer Vision, Venice, Italy, pp. 804\u2013813, 2017. DOI: https:\/\/doi.org\/10.1109\/ICCV.2017.93","DOI":"10.1109\/ICCV.2017.93"},{"key":"1360_CR23","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/978-3-030-01234-2_4","volume-title":"Proceedings of the European Conference on Computer Vision","author":"R Hu","year":"2018","unstructured":"R. Hu, J. Andreas, T. Darrell, K. Saenko. Explainable neural computation via stack neural module networks. In Proceedings of the European Conference on Computer Vision, Springer, Munich, Germany, pp. 53\u201369, 2018. DOI: https:\/\/doi.org\/10.1007\/978-3-030-01234-2_4"},{"key":"1360_CR24","unstructured":"Z. Chen, J. Mao, J. Wu, K. Wong, J. Tenenbaum, C. Gan. Grounding physical concepts of objects and events through dynamic visual reasoning. In Proceedings of International Conference on Learning Representations, Vienna, Austria, 2021."},{"key":"1360_CR25","doi-asserted-by":"publisher","unstructured":"Q. Li, S. Huang, Y. Hong, S.-C. Zhu. A competence-aware curriculum for visual concepts learning via question answering. In Proceedings of the European Conference on Computer Vision, Springer, pp. 141\u2013157, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-58536-5_9","DOI":"10.1007\/978-3-030-58536-5_9"},{"key":"1360_CR26","doi-asserted-by":"publisher","unstructured":"E. Perez, F. Strub, H. De Vries, V. Dumoulin, A. Courville. Film: Visual reasoning with a general conditioning layer. In Proceedings of AAAI Conference on Artificial Intelligence, New Orleans, USA, pp. 3942\u20133951, 2018. DOI: https:\/\/doi.org\/10.1609\/aaai.v32i1.11671","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"1360_CR27","unstructured":"D. A. Hudson, C. D. Manning. Compositional attention networks for machine reasoning. In Proceedings of International Conference on Learning Representations, Vancouver, Canada, 2018."},{"key":"1360_CR28","doi-asserted-by":"publisher","unstructured":"Z. Wang, K. Wang, M. Yu, J. Xiong, W. Hwu, M. Hasegawa-Johnson, H. Shi. Interpretable visual reasoning via induced symbolic space. In Proceedings of IEEE International Conference on Computer Vision, Montr\u00e9al, Canada, pp. 1878\u20131887, 2021. DOI: https:\/\/doi.org\/10.1109\/ICCV48922.2021.00189","DOI":"10.1109\/ICCV48922.2021.00189"},{"key":"1360_CR29","doi-asserted-by":"publisher","unstructured":"A. Kamath, M. Singh, Y. LeCun, I. Misra, G. Synnaeve, N. Carion. Mdetr-modulated detection for end-to-end multimodal understanding. In Proceedings of IEEE International Conference on Computer Vision, Montr\u00e9al, Canada, pp. 1780\u20131790, 2021. DOI: https:\/\/doi.org\/10.1109\/ICCV48922.2021.00180","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"1360_CR30","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1214\/09-SS057","volume":"3","author":"J Pearl","year":"2009","unstructured":"J. Pearl. Causal inference in statistics: An overview. Statistics Surveys, vol. 3, pp. 96\u2013146, 2009. DOI: https:\/\/doi.org\/10.1214\/09-SS057.","journal-title":"Statistics Surveys"},{"issue":"93","key":"1360_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3310\/hta19930","volume":"19","author":"G Dunn","year":"2015","unstructured":"G. Dunn, R. Emsley, H. Liu, S. Landau, J. Green, I. White, A. Pickles. Evaluation and validation of social and psychological markers in randomised trials of complex interventions in mental health: a methodological research programme. Health Technology Assessment, Winchester, England, vol. 19, no. 93, pp. 1\u2013115, 2015. DOI: https:\/\/doi.org\/10.3310\/hta19930.","journal-title":"Health Technology Assessment, Winchester, England"},{"issue":"3","key":"1360_CR32","doi-asserted-by":"publisher","first-page":"395","DOI":"10.2189\/asqu.53.3.395","volume":"53","author":"B G King","year":"2008","unstructured":"B. G. King. A political mediation model of corporate response to social movement activism. Administrative Science Quarterly, vol. 53, no. 3, pp. 395\u2013421, 2008. DOI: https:\/\/doi.org\/10.2189\/asqu.53.3.395.","journal-title":"Administrative Science Quarterly"},{"key":"1360_CR33","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1146\/annurev.psych.58.110405.085542","volume":"58","author":"D P MacKinnon","year":"2007","unstructured":"D. P. MacKinnon, A. J. Fairchild, M. S. Fritz. Mediation analysis. Annual Review of Psychology, vol. 58, pp. 593\u2013614, 2007. DOI: https:\/\/doi.org\/10.1146\/annurev.psych.58.110405.085542.","journal-title":"Annual Review of Psychology"},{"issue":"5","key":"1360_CR34","doi-asserted-by":"publisher","first-page":"1511","DOI":"10.1093\/ije\/dyt127","volume":"42","author":"L Richiardi","year":"2013","unstructured":"L. Richiardi, R. Bellocco, D. Zugna. Mediation analysis in epidemiology: methods, interpretation and bias. International Journal of Epidemiology, vol. 42, no. 5, pp. 1511\u20131519, 2013. DOI: https:\/\/doi.org\/10.1093\/ije\/dyt127.","journal-title":"International Journal of Epidemiology"},{"key":"1360_CR35","unstructured":"S. Nair, Y. Zhu, S. Savarese, L. Fei-Fei. Causal induction from visual observations for goal directed tasks. [Online], Available: https:\/\/arxiv.org\/abs\/1910.01751."},{"key":"1360_CR36","doi-asserted-by":"publisher","unstructured":"Y. Niu, K. Tang, H. Zhang, Z. Lu, X.-S. Hua, J.-R. Wen. Counterfactual VQA: A cause-effect look at language bias. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Nashville, USA, pp. 12700\u201312710, 2021. DOI: https:\/\/doi.org\/10.1109\/CVPR46437.2021.01251.","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"1360_CR37","doi-asserted-by":"publisher","unstructured":"J. Qi, Y. Niu, J. Huang, H. Zhang. Two causal principles for improving visual dialog. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 10860\u201310869, 2020. DOI: https:\/\/doi.org\/10.1109\/CV-PR42600.2020.01087","DOI":"10.1109\/CV-PR42600.2020.01087"},{"key":"1360_CR38","doi-asserted-by":"publisher","unstructured":"T. Wang, J. Huang, H. Zhang, Q. Sun. Visual common-sense R-CNN. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 10760\u201310770, 2020. DOI: https:\/\/doi.org\/10.1109\/CVPR42600.2020.01077","DOI":"10.1109\/CVPR42600.2020.01077"},{"key":"1360_CR39","doi-asserted-by":"publisher","unstructured":"X. Yang, H. Zhang, J. Cai. Deconfounded image captioning: A causal retrospect. IEEE Transactions on Pattern Analysis and Machine Intelligence, to be published, 2022. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2021.3121705.","DOI":"10.1109\/TPAMI.2021.3121705"},{"key":"1360_CR40","doi-asserted-by":"publisher","unstructured":"K. Tang, Y. Niu, J. Huang, J. Shi, H. Zhang. Unbiased scene graph generation from biased training. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 3716\u20133725, 2020. DOI: https:\/\/doi.org\/10.1109\/CVPR42600.2020.00377","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"1360_CR41","unstructured":"I. Loshchilov, F. Hutter. Decoupled weight decay regularization. In Proceedings of International Conference on Learning Representations, New Orleans, USA, 2019."}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-022-1360-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-022-1360-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-022-1360-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T17:42:16Z","timestamp":1673372536000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-022-1360-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,10]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["1360"],"URL":"https:\/\/doi.org\/10.1007\/s11633-022-1360-1","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"type":"print","value":"2731-538X"},{"type":"electronic","value":"2731-5398"}],"subject":[],"published":{"date-parts":[[2023,1,10]]},"assertion":[{"value":"29 May 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 January 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}