{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T00:28:51Z","timestamp":1702600131352},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2023,6,10]],"date-time":"2023-06-10T00:00:00Z","timestamp":1686355200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,10]],"date-time":"2023-06-10T00:00:00Z","timestamp":1686355200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s13042-023-01876-9","type":"journal-article","created":{"date-parts":[[2023,6,10]],"date-time":"2023-06-10T08:02:46Z","timestamp":1686384166000},"page":"3979-3991","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Brain-inspired learning to deeper inductive reasoning for video captioning"],"prefix":"10.1007","volume":"14","author":[{"given":"Xiao","family":"Yao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feiyang","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peipei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,6,10]]},"reference":[{"key":"1876_CR1","doi-asserted-by":"crossref","unstructured":"Aafaq N, Akhtar N, Liu W, et\u00a0al (2019) Spatio-temporal dynamics and semantic attribute enriched visual encoding for video captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 12,487\u201312,496","DOI":"10.1109\/CVPR.2019.01277"},{"key":"1876_CR2","unstructured":"Banerjee S, Lavie A (2005) Meteor: An automatic metric for mt evaluation with improved correlation with human judgments. In: Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization, pp 65\u201372"},{"issue":"2","key":"1876_CR3","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1080\/00221325.1958.10532420","volume":"93","author":"HR Burke","year":"1958","unstructured":"Burke HR (1958) Raven\u2019s progressive matrices: a review and critical evaluation. J Genetic Psychol 93(2):199\u2013228","journal-title":"J Genetic Psychol"},{"key":"1876_CR4","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"1876_CR5","unstructured":"Chen D, Dolan WB (2011) Collecting highly parallel data for paraphrase evaluation. In: Proceedings of the 49th annual meeting of the association for computational linguistics: human language technologies, pp 190\u2013200"},{"key":"1876_CR6","doi-asserted-by":"crossref","unstructured":"Chen J, Pan Y, Li Y, et\u00a0al (2019) Temporal deformable convolutional encoder-decoder networks for video captioning. In: Proceedings of the AAAI conference on artificial intelligence, pp 8167\u20138174","DOI":"10.1609\/aaai.v33i01.33018167"},{"key":"1876_CR7","doi-asserted-by":"crossref","unstructured":"Chen S, Jiang YG (2019) Motion guided spatial attention for video captioning. In: Proceedings of the AAAI conference on artificial intelligence, pp 8191\u20138198","DOI":"10.1609\/aaai.v33i01.33018191"},{"key":"1876_CR8","doi-asserted-by":"crossref","unstructured":"Chen S, Chen J, Jin Q, et\u00a0al (2017) Video captioning with guidance of multimodal latent topics. In: Proceedings of the 25th ACM international conference on Multimedia, pp 1838\u20131846","DOI":"10.1145\/3123266.3123420"},{"key":"1876_CR9","doi-asserted-by":"crossref","unstructured":"Chen Y, Wang S, Zhang W, et\u00a0al (2018) Less is more: Picking informative frames for video captioning. In: Proceedings of the European conference on computer vision (ECCV), pp 358\u2013373","DOI":"10.1007\/978-3-030-01261-8_22"},{"issue":"1","key":"1876_CR10","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/j.learninstruc.2006.11.009","volume":"17","author":"C Christou","year":"2007","unstructured":"Christou C, Papageorgiou E (2007) A framework of mathematics inductive reasoning. Learn Instruct 17(1):55\u201366","journal-title":"Learn Instruct"},{"key":"1876_CR11","doi-asserted-by":"crossref","unstructured":"Donahue J, Hendricks LA, Guadarrama S, et\u00a0al (2015) Long-term recurrent convolutional networks for visual recognition and description. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR.2015.7298878"},{"issue":"5","key":"1876_CR12","first-page":"1112","volume":"42","author":"L Gao","year":"2019","unstructured":"Gao L, Li X, Song J et al (2019) Hierarchical lstms with adaptive attention for visual captioning. IEEE Trans Pattern Anal Mach Intell 42(5):1112\u20131131","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1876_CR13","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"1876_CR14","doi-asserted-by":"crossref","unstructured":"Hayes BK, Heit E (2018) Inductive reasoning 2.0. Wiley Interdisciplinary Reviews: Cognitive Science 9(3):e1459","DOI":"10.1002\/wcs.1459"},{"key":"1876_CR15","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, et\u00a0al (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1876_CR16","doi-asserted-by":"crossref","unstructured":"He K, Fan H, Wu Y, et\u00a0al (2020) Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9729\u20139738","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"1876_CR17","doi-asserted-by":"crossref","unstructured":"Heit E (1999) A bayesian analysis of some forms of inductive reasoning. rational models of cognition","DOI":"10.1093\/oso\/9780198524151.003.0012"},{"key":"1876_CR18","doi-asserted-by":"crossref","unstructured":"Hou J, Wu X, Zhao W, et\u00a0al (2019) Joint syntax representation learning and visual cue translation for video captioning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 8918\u20138927","DOI":"10.1109\/ICCV.2019.00901"},{"key":"1876_CR19","unstructured":"Kingma DP, Welling M (2013) Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114"},{"key":"1876_CR20","doi-asserted-by":"crossref","unstructured":"Krishna R, Hata K, Ren F, et\u00a0al (2017) Dense-captioning events in videos. In: 2017 IEEE International Conference on Computer Vision (ICCV)","DOI":"10.1109\/ICCV.2017.83"},{"issue":"2","key":"1876_CR21","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1037\/xge0000496","volume":"148","author":"JC Lee","year":"2019","unstructured":"Lee JC, Lovibond PF, Hayes BK et al (2019) Negative evidence and inductive reasoning in generalization of associative learning. J Exp Psychol 148(2):289","journal-title":"J Exp Psychol"},{"key":"1876_CR22","unstructured":"Lin CY (2004) Rouge: A package for automatic evaluation of summaries. In: Text summarization branches out, pp 74\u201381"},{"key":"1876_CR23","doi-asserted-by":"crossref","unstructured":"Lu J, Xiong C, Parikh D, et\u00a0al (2017) Knowing when to look: Adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 375\u2013383","DOI":"10.1109\/CVPR.2017.345"},{"key":"1876_CR24","doi-asserted-by":"crossref","unstructured":"Mahon L, Giunchiglia E, Li B, et\u00a0al (2020) Knowledge graph extraction from videos. In: 2020 19th IEEE International conference on machine learning and applications (ICMLA), IEEE, pp 25\u201332","DOI":"10.1109\/ICMLA51294.2020.00014"},{"key":"1876_CR25","unstructured":"Mikolov T, Chen K, Corrado G, et\u00a0al (2013) Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781"},{"key":"1876_CR26","doi-asserted-by":"crossref","unstructured":"Mun J, Yang L, Ren Z, et\u00a0al (2019) Streamlined dense video captioning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6588\u20136597","DOI":"10.1109\/CVPR.2019.00675"},{"key":"1876_CR27","doi-asserted-by":"crossref","unstructured":"Pan B, Cai H, Huang DA, et\u00a0al (2020) Spatio-temporal graph for video captioning with knowledge distillation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10,870\u201310,879","DOI":"10.1109\/CVPR42600.2020.01088"},{"key":"1876_CR28","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, et\u00a0al (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the association for computational linguistics, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"1876_CR29","unstructured":"Rasmussen D (2010) A neural modelling approach to investigating general intelligence. Master\u2019s thesis, University of Waterloo"},{"issue":"1","key":"1876_CR30","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1111\/j.1756-8765.2010.01127.x","volume":"3","author":"D Rasmussen","year":"2011","unstructured":"Rasmussen D, Eliasmith C (2011) A neural model of rule generation in inductive reasoning. Topics Cognit Sci 3(1):140\u2013153","journal-title":"Topics Cognit Sci"},{"key":"1876_CR31","unstructured":"Sohn K, Yan X, Lee H, et\u00a0al (2015) Learning structured output representation using deep conditional generative models. In: International conference on neural information processing systems"},{"issue":"8","key":"1876_CR32","doi-asserted-by":"publisher","first-page":"5959","DOI":"10.1007\/s00521-021-06733-w","volume":"34","author":"P Tang","year":"2022","unstructured":"Tang P, Tan Y, Luo W (2022) Visual and language semantic hybrid enhancement and complementary for video description. Neural Comput Appl 34(8):5959\u20135977","journal-title":"Neural Comput Appl"},{"key":"1876_CR33","doi-asserted-by":"crossref","unstructured":"Vedantam R, Lawrence\u00a0Zitnick C, Parikh D (2015) Cider: Consensus-based image description evaluation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4566\u20134575","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"1876_CR34","doi-asserted-by":"crossref","unstructured":"Venugopalan S, Rohrbach M, Donahue J, et\u00a0al (2015) Sequence to sequence-video to text. In: Proceedings of the IEEE international conference on computer vision, pp 4534\u20134542","DOI":"10.1109\/ICCV.2015.515"},{"issue":"5","key":"1876_CR35","doi-asserted-by":"publisher","first-page":"1890","DOI":"10.1109\/TCSVT.2020.3014606","volume":"31","author":"T Wang","year":"2020","unstructured":"Wang T, Zheng H, Yu M et al (2020) Event-centric hierarchical representation for dense video captioning. IEEE Trans Circ Syst Video Technol 31(5):1890\u20131900","journal-title":"IEEE Trans Circ Syst Video Technol"},{"key":"1876_CR36","doi-asserted-by":"crossref","unstructured":"Xu J, Mei T, Yao T, et\u00a0al (2016) Msr-vtt: A large video description dataset for bridging video and language. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5288\u20135296","DOI":"10.1109\/CVPR.2016.571"},{"issue":"1","key":"1876_CR37","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1109\/TMM.2019.2924576","volume":"22","author":"C Yan","year":"2019","unstructured":"Yan C, Tu Y, Wang X et al (2019) Stat: Spatial-temporal attention mechanism for video captioning. IEEE Trans Multimedia 22(1):229\u2013241","journal-title":"IEEE Trans Multimedia"},{"issue":"1","key":"1876_CR38","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1109\/TCSVT.2021.3067449","volume":"32","author":"C Yan","year":"2021","unstructured":"Yan C, Hao Y, Li L et al (2021) Task-adaptive attention for image captioning. IEEE Trans Circ Syst Video Technol 32(1):43\u201351","journal-title":"IEEE Trans Circ Syst Video Technol"},{"issue":"11","key":"1876_CR39","doi-asserted-by":"publisher","first-page":"5552","DOI":"10.1109\/TIP.2019.2916757","volume":"28","author":"B Zhao","year":"2019","unstructured":"Zhao B, Li X, Lu X (2019) Cam-rnn: Co-attention model based rnn for video captioning. IEEE Trans Image Process 28(11):5552\u20135565","journal-title":"IEEE Trans Image Process"},{"key":"1876_CR40","doi-asserted-by":"crossref","unstructured":"Zheng Q, Wang C, Tao D (2020) Syntax-aware action targeting for video captioning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13,096\u201313,105","DOI":"10.1109\/CVPR42600.2020.01311"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-023-01876-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-023-01876-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-023-01876-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,14]],"date-time":"2023-12-14T22:34:52Z","timestamp":1702593292000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-023-01876-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,10]]},"references-count":40,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["1876"],"URL":"https:\/\/doi.org\/10.1007\/s13042-023-01876-9","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,10]]},"assertion":[{"value":"5 October 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}