{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T04:07:59Z","timestamp":1779422879362,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T00:00:00Z","timestamp":1779753600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,26]]},"DOI":"10.1145\/3786335.3813213","type":"proceedings-article","created":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T03:16:22Z","timestamp":1779419782000},"page":"1193-1198","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Genflow Ad Studio: A Compound AI Architecture for Brand-Aligned, Self-Correcting Video Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0233-4623","authenticated-orcid":false,"given":"Debanshu","family":"Das","sequence":"first","affiliation":[{"name":"Google, San Francisco, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6107-574X","authenticated-orcid":false,"given":"Lavi","family":"Nigam","sequence":"additional","affiliation":[{"name":"Google, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9917-1353","authenticated-orcid":false,"given":"Sunil Kumar Jang","family":"Bahadur","sequence":"additional","affiliation":[{"name":"Google, Mumbai, Maharashtra, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4588-0092","authenticated-orcid":false,"given":"Gopala","family":"Dhar","sequence":"additional","affiliation":[{"name":"Google, Mumbai, Maharashtra, India"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,26]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Tim Brooks Bill Peebles Connor Holmes et al. 2024. Video generation models as world simulators. OpenAI Blog. Retrieved April 28 2026 from https:\/\/openai.com\/research\/video-generation-models-as-world-simulators"},{"key":"e_1_3_3_2_3_2","unstructured":"Veo Team. 2025. Veo 3 Technical Report. Google DeepMind. Retrieved April 28 2026 from https:\/\/storage.googleapis.com\/deepmind-media\/veo\/Veo-3-Tech-Report.pdf"},{"key":"e_1_3_3_2_4_2","unstructured":"Matei Zaharia Omar Khattab Lingjiao Chen et al. 2024. The Shift from Models to Compound AI Systems. Berkeley AI Research Blog. Retrieved April 28 2026 from https:\/\/bair.berkeley.edu\/blog\/2024\/02\/18\/compound-ai-systems\/"},{"key":"e_1_3_3_2_5_2","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal et al. 2023. Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets. arXiv:https:\/\/arXiv.org\/abs\/2311.15127 [cs.CV]."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Ziwei Ji Nayeon Lee Rita Frieske et al. 2023. Survey of Hallucination in Natural Language Generation. ACM Computing Surveys 55 12 (2023) 1\u201338.","DOI":"10.1145\/3571730"},{"key":"e_1_3_3_2_7_2","unstructured":"Yilun Du Shuang Li Antonio Torralba et al. 2024. Improving Factuality and Reasoning in Language Models through Multiagent Debate. Proceedings of the 41st International Conference on Machine Learning (PMLR 235) 11733\u201311763."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Tian Liang Zhiwei He Wenxiang Jiao et al. 2024. Encouraging Divergent Thinking in Large Language Models through Multi-Agent Debate. Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP) 17889\u201317904.","DOI":"10.18653\/v1\/2024.emnlp-main.992"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Noah Shinn Federico Cassano Ashwin Gopinath et al. 2023. Reflexion: Language Agents with Verbal Reinforcement Learning. Advances in Neural Information Processing Systems 36 (2023) 8634\u20138652.","DOI":"10.52202\/075280-0377"},{"key":"e_1_3_3_2_10_2","unstructured":"Qingyun Wu Gagan Bansal Jieyu Zhang et al. 2024. AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversations. Proceedings of the First Conference on Language Modeling (COLM). Retrieved April 28 2026 from https:\/\/openreview.net\/forum?id=BAakY1hNKS"},{"key":"e_1_3_3_2_11_2","unstructured":"Omar Khattab Arnav Singhvi Paridhi Maheshwari et al. 2024. DSPy: Compiling Declarative Language Model Calls into State-of-the-Art Pipelines. Proceedings of the Twelfth International Conference on Learning Representations (ICLR). Retrieved April 28 2026 from https:\/\/openreview.net\/forum?id=sY5N0zY5Od"},{"key":"e_1_3_3_2_12_2","unstructured":"Harrison Chase. 2022. LangChain. GitHub. Retrieved April 28 2026 from https:\/\/github.com\/langchain-ai\/langchain"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Haolun Wu Ye Yuan Liana Mikaelyan et al. 2024. Learning to Extract Structured Entities Using Language Models. Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP). 6817\u20136834.","DOI":"10.18653\/v1\/2024.emnlp-main.388"},{"key":"e_1_3_3_2_14_2","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol et al. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arXiv:https:\/\/arXiv.org\/abs\/2204.06125 [cs.CV]."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Andreas Blattmann Robin Rombach Huan Ling et al. 2023. Align Your Latents: High-Resolution Video Synthesis with Latent Diffusion Models. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 22563\u201322575.","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"e_1_3_3_2_16_2","unstructured":"Gemini Team Machel Reid Nikolay Savinov et al. 2024. Gemini 1.5: Unlocking Multimodal Understanding Across Millions of Tokens of Context. arXiv:https:\/\/arXiv.org\/abs\/2403.05530 [cs.CL]."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu et al. 2023. Visual Instruction Tuning. Advances in Neural Information Processing Systems 36 (2023) 34892\u201334916.","DOI":"10.52202\/075280-1516"},{"key":"e_1_3_3_2_18_2","unstructured":"Pydantic Core Team. 2025. Pydantic: Data validation using Python type hints. Software Documentation. Retrieved April 28 2026 from https:\/\/pydantic.dev\/"},{"key":"e_1_3_3_2_19_2","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner et al. 2017. GANs trained by a two time-scale update rule converge to a local Nash equilibrium. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_3_2_20_2","unstructured":"Thomas Unterthiner Sjoerd van Steenkiste Karol Kurach et al. 2018. Towards accurate generative models of video: A new metric & challenges. arXiv:https:\/\/arXiv.org\/abs\/1812.01717."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng et al. 2023. Judging LLM-as-a-judge with MT-Bench and Chatbot Arena. Advances in Neural Information Processing Systems 36 (2023) 46595\u201346623.","DOI":"10.52202\/075280-2020"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans et al. 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. Advances in Neural Information Processing Systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_2_23_2","unstructured":"Shunyu Yao Jeffrey Zhao Dian Yu et al. 2023. ReAct: Synergizing Reasoning and Acting in Language Models. International Conference on Learning Representations (ICLR). Retrieved April 28 2026 from https:\/\/openreview.net\/forum?id=WE_vluYUL-X"},{"key":"e_1_3_3_2_24_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal et al. 2023. GPT-4 Technical Report. arXiv:https:\/\/arXiv.org\/abs\/2303.08774."},{"key":"e_1_3_3_2_25_2","unstructured":"Hugo Touvron Louis Martin Kevin Stone et al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:https:\/\/arXiv.org\/abs\/2307.09288."},{"key":"e_1_3_3_2_26_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar et al. 2017. Attention Is All You Need. Advances in Neural Information Processing Systems 30 (2017)."}],"event":{"name":"CAIS '26: ACM Conference on AI and Agentic Systems","location":"San Jose CA USA","acronym":"CAIS '26"},"container-title":["Proceedings of the ACM Conference on AI and Agentic Systems"],"original-title":[],"deposited":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T03:30:01Z","timestamp":1779420601000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3786335.3813213"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,26]]},"references-count":25,"alternative-id":["10.1145\/3786335.3813213","10.1145\/3786335"],"URL":"https:\/\/doi.org\/10.1145\/3786335.3813213","relation":{},"subject":[],"published":{"date-parts":[[2026,5,26]]},"assertion":[{"value":"2026-05-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}