{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T01:43:43Z","timestamp":1778723023350,"version":"3.51.4"},"reference-count":147,"publisher":"Tsinghua University Press","issue":"1","funder":[{"DOI":"10.13039\/100010663","name":"European Research Council","doi-asserted-by":"publisher","award":["742870"],"award-info":[{"award-number":["742870"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001711","name":"Swiss National Science Foundation","doi-asserted-by":"publisher","award":["200021,192356"],"award-info":[{"award-number":["200021,192356"]}],"id":[{"id":"10.13039\/501100001711","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476143"],"award-info":[{"award-number":["62476143"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Comp. Visual. Med."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.26599\/cvm.2025.9450460","type":"journal-article","created":{"date-parts":[[2025,2,25]],"date-time":"2025-02-25T18:46:39Z","timestamp":1740509199000},"page":"29-81","source":"Crossref","is-referenced-by-count":8,"title":["Mindstorms in Natural Language-Based Societies of Mind"],"prefix":"10.26599","volume":"11","author":[{"given":"Mingchen","family":"Zhuge","sequence":"first","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haozhe","family":"Liu","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francesco","family":"Faccio","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dylan R.","family":"Ashley","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R\u00f3bert","family":"Csord\u00e1s","sequence":"additional","affiliation":[{"name":"Stanford University,California,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anand","family":"Gopalakrishnan","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abdullah","family":"Hamdi","sequence":"additional","affiliation":[{"name":"Oxford University,Oxford,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hasan Abed Al Kader","family":"Hammoud","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vincent","family":"Herrmann","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kazuki","family":"Irie","sequence":"additional","affiliation":[{"name":"Harvard University,Cambridge,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Louis","family":"Kirsch","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bing","family":"Li","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guohao","family":"Li","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuming","family":"Liu","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinjie","family":"Mai","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Piotr","family":"Pi\u0119kos","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aditya A.","family":"Ramesh","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Imanol","family":"Schlag","sequence":"additional","affiliation":[{"name":"ETH AI Center,Zurich,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weimin","family":"Shi","sequence":"additional","affiliation":[{"name":"Beihang University,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aleksandar","family":"Stani\u0107","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenyi","family":"Wang","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuhui","family":"Wang","sequence":"additional","affiliation":[{"name":"Dalle Molle Institute for Artificial Intelligence Research,Lugano,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengmeng","family":"Xu","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deng-Ping","family":"Fan","sequence":"additional","affiliation":[{"name":"CS &#x0026; VCIP, Nankai University,Tianjin,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bernard","family":"Ghanem","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J\u00fcrgen","family":"Schmidhuber","sequence":"additional","affiliation":[{"name":"Center of Excellence for Generative AI, King Abdullah University of Science and Technology,Saudi Arabia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"11138","reference":[{"key":"ref1","volume-title":"Society of Mind","author":"Minsky","year":"1988"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref3","first-page":"165","article-title":"A dual back-propagation scheme for scalar reward learning","volume-title":"Proceedings of the 9th Annual Conference of the Cognitive Science Society","author":"Munro","year":"1987"},{"key":"ref4","volume-title":"Supervised learning and systems with excess degrees of freedom","author":"Jordan","year":"1988"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1989.70114"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1989.118583"},{"key":"ref7","first-page":"836","article-title":"Dynamic reinforcement driven error propagation networks with application to game playing","volume-title":"Proceedings of the Annual Meeting of the Cognitive Science Society","volume":"11","author":"Robinson","year":"1989"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1888.003.0014"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/72.80202"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1404.7828"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1990.137723"},{"key":"ref12","first-page":"500","article-title":"Reinforcement learning in Markovian and non-Markovian environments","volume-title":"Proceedings of the 3rd International Conference on Neural Information Processing Systems","author":"Schmidhuber","year":"1990"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1080\/09540090600768658"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2056368"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.04.008"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1991.155375"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/S0019-9958(64)90223-2"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1080\/00207166808803030"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/321356.321363"},{"issue":"5","key":"ref21","first-page":"1413","article-title":"On the notion of a random sequence","volume":"14","author":"Levin","year":"1973","journal-title":"Soviet Math. Dokl"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1978.1055913"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2606-0"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1142\/S0129054102001291"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000015880.99707.b2"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(96)00127-X"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.47925\/2007.221"},{"key":"ref28","volume-title":"One big net for everything","author":"Schmidhuber","year":"2018"},{"key":"ref29","volume-title":"Augmented language models: A survey","author":"Mialon","year":"2023"},{"key":"ref30","volume-title":"A survey of large language models","author":"Zhao","year":"2023"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"ref32","first-page":"13","article-title":"Augmenting human intellect: Aconceptual framework","volume-title":"Augmented Education inthe Global Age","author":"Engelbart","year":"2023"},{"key":"ref33","article-title":"NASA Moon Survival Task: The Original Consensus Exercise","author":"Hall","year":"1989","journal-title":"Teleometrics International"},{"key":"ref34","first-page":"235","article-title":"A universal modular actor formalism for artificial intelligence","volume-title":"Proceedings of the 3rd International Joint Conference on Artificial Intelligence","author":"Hewitt","year":"1973"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/800055.802036"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21236\/ada459166"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.3115\/981863.981871"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/BF00991480"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0029484"},{"key":"ref40","volume-title":"Socratic models: Composing zero-shot multimodal reasoning with language","author":"Zeng","year":"2022"},{"key":"ref41","volume-title":"Visual ChatGPT: Talking, drawing and editing with visual foundation models","author":"Wu","year":"2023"},{"key":"ref42","volume-title":"HuggingGPT: Solving AI tasks with ChatGPT and its friends in hugging face","author":"Shen","year":"2023"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01092"},{"key":"ref44","volume-title":"Toolformer: Language models can teach themselves to use tools","author":"Schick","year":"2023"},{"key":"ref45","volume-title":"ToolLLM:Facilitating large language models to master 16000+ real-world APIs","author":"Qin","year":"2023"},{"key":"ref46","volume-title":"AutoGPT","year":"2023"},{"key":"ref47","volume-title":"LangChain","author":"Chase","year":"2022"},{"key":"ref48","volume-title":"LlamaIndex","author":"Liu","year":"2022"},{"key":"ref49","volume-title":"Xagent: An autonomous agent for complex task solving","year":"2023"},{"key":"ref50","volume-title":"Voyager: An open-ended embodied agent with large language models","author":"Wang","year":"2023"},{"key":"ref51","volume-title":"CAMEL: Communicative agents for \u201cmind\u201d exploration of large language model society","author":"Li","year":"2023"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.810"},{"key":"ref53","volume-title":"MetaGPT: Meta programming for A multi-agent collaborative framework","author":"Hong","year":"2023"},{"key":"ref54","volume-title":"Agents: An open-source framework for autonomous language agents","author":"Zhou","year":"2023"},{"key":"ref55","volume-title":"AutoGen: Enabling next-gen LLM applications via multi-agent conversation","author":"Wu","year":"2023"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"ref57","volume-title":"Evaluating language model agency through negotiations","author":"Davidson","year":"2024"},{"key":"ref58","volume-title":"ChatEval: Towards better LLM-based evaluators through multi-agent debate","author":"Chan","year":"2023"},{"key":"ref59","volume-title":"Dynamic llm-agent network: An llm-agent collaboration framework with agent team optimization","author":"Liu","year":"2023"},{"key":"ref60","volume-title":"DSPy: Compiling declarative language model calls into self-improving pipelines","author":"Khattab","year":"2023"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20074-8_9"},{"key":"ref62","first-page":"46595","article-title":"Judging LLM-as-a-judge with MT-bench and chatbot arena","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Zheng","year":"2023"},{"key":"ref63","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Ouyang","year":"2024"},{"key":"ref64","volume-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","author":"Li","year":"2023"},{"key":"ref65","volume-title":"OFA: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework","author":"Wang","year":"2022"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.488"},{"key":"ref67","first-page":"13","article-title":"ViLBERT: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","volume-title":"Proceedings of the 33d International Conference on Neural Information Processing Systems","author":"Lu","year":"2019"},{"key":"ref68","volume-title":"ChatGPT asks, BLIP-2 answers: Automatic questioning towards enriched visual descriptions","author":"Zhu","year":"2023"},{"key":"ref69","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning","volume":"37","author":"Xu","year":"2015"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.81"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"ref72","volume-title":"Hierarchical text-conditional image generation with CLIP latents","author":"Ramesh","year":"2022"},{"key":"ref73","volume-title":"Training a helpful and harmless assistant with reinforcement learning from human feedback","author":"Bai","year":"2022"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1093\/jigpal\/jzp049"},{"key":"ref75","first-page":"439","article-title":"The neural bucket brigade","volume-title":"Connection is m in Perspective","author":"Schmidhuber","year":"1989"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1080\/09540098908915650"},{"key":"ref77","first-page":"1","article-title":"Properties of the bucket brigade","volume-title":"Proceedings of the 1st International Conference on Genetic Algorithms","author":"Holland","year":"1985"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1162\/evco.1994.2.1.1"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007593124513"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4314839"},{"key":"ref81","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proceedings of the th International Conference on Neural Information Processing Systems","author":"Brown","year":"2020"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1103\/RevModPhys.39.883"},{"key":"ref83","volume-title":"Annotated history of modern AI and deep learning","author":"Schmidhuber","year":"2022"},{"key":"ref84","volume-title":"Collective Choice and Social Welfare","author":"Sen","year":"1971"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.2307\/1914083.JSTOR1914083"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1016\/0022-0531(75)90050-2"},{"key":"ref87","volume-title":"Theory of Games and Economic Behavior","author":"Von Neumann","year":"1947"},{"key":"ref88","volume-title":"Language is not all you need: Aligning perception with language models","author":"Huang","year":"2023"},{"key":"ref89","first-page":"24824","article-title":"Chain of thought prompting elicits reasoning in large language models","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Wei","year":"2024"},{"key":"ref90","volume-title":"First powdered flight-plane truth","author":"Schmidhuber","year":"2003"},{"key":"ref91","volume-title":"PromptCap: Prompt-guided task-aware image captioning","author":"Hu","year":"2022"},{"key":"ref92","volume-title":"Introducing ChatGPT","year":"2022"},{"key":"ref93","volume-title":"GIT: A generative image-to-text transformer for vision and language","author":"Wang","year":"2022"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.18653\/vl\/N19-142"},{"key":"ref95","volume-title":"ClipCap: CLIP prefix for image captioning","author":"Mokady","year":"2021"},{"key":"ref96","volume-title":"Pythia v0.1: The winning entry to the VQA challenge 2018","author":"Jiang","year":"2018"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_38"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01389"},{"key":"ref100","volume-title":"Open AI","year":"2023"},{"key":"ref101","author":"Fulford","year":"2023","journal-title":"Chatgpt prompt engineering for developers"},{"key":"ref102","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Kojima","year":"2024"},{"key":"ref103","year":"2017","journal-title":"spaCy: Industrial-strength natural language processing"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref105","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume-title":"Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization","author":"Banerjee","year":"2005"},{"key":"ref106","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text Summarization Branches Out"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref108","first-page":"957","article-title":"From word embeddings to document distances","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning","volume":"37","author":"Kusner","year":"2015"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"ref110","year":"2023","journal-title":"Imagine 3D model"},{"key":"ref111","volume-title":"Dreamfusion: Text-to-3D using 2D diffusion","author":"Poole","year":"2022"},{"key":"ref112","first-page":"300","article-title":"Mgic3D: High-resolution text-to-3D content creation","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Lin","year":"2023"},{"key":"ref113","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"Radford","year":"2021"},{"key":"ref114","volume-title":"Boundary-denoising for video activity localization","author":"Xu","year":"2023"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213"},{"key":"ref118","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","volume-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems","volume":"1","author":"Simonyan","year":"2014"},{"key":"ref119","volume-title":"DINO: DETR with improved DeNoising anchor boxes for end-to-end object detection","author":"Zhang","year":"2022"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.563"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00361"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6984"},{"key":"ref124","volume-title":"Finding moments in video collections using natural language","author":"Escorcia","year":"2019"},{"key":"ref125","first-page":"11846","article-title":"QVHIGHLIGHTS: Detecting moments and highlights in videos via natural language queries","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","author":"Lei","year":"2024"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01030"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01082"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_36"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093328"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16285"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.155"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00399"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"ref135","volume-title":"ReLERZJU-alibaba submission to the Ego4D natural language queries challenge 2022","author":"Liu","year":"2022"},{"key":"ref136","volume-title":"Exploring anchor-based detection for Ego4D natural language query","author":"Zheng","year":"2022"},{"key":"ref137","volume-title":"A simple transformer-based model for Ego4D natural language queries challenge","author":"Mo","year":"2022"},{"key":"ref138","first-page":"7575","article-title":"Egocentric video-language pretraining","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Lin","year":"2024"},{"key":"ref139","volume-title":"An efficient COarse-to-fiNE alignment framework Ego4D natural language queries challenge 2022","author":"Hou","year":"2022"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561916"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2022.104304"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2000.844100"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00008"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3013848"},{"key":"ref147","article-title":"ChatGPT is generating fake news stories - attributed to real journalists","author":"Chiu","year":"2023","journal-title":"I set out to separate fact from fiction"}],"container-title":["Computational Visual Media"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10750449\/10903665\/10903668.pdf?arnumber=10903668","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T17:34:42Z","timestamp":1741714482000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10903668\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":147,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.26599\/cvm.2025.9450460","relation":{},"ISSN":["2096-0662","2096-0433"],"issn-type":[{"value":"2096-0662","type":"electronic"},{"value":"2096-0433","type":"print"}],"subject":[],"published":{"date-parts":[[2025,2]]}}}