{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T08:52:34Z","timestamp":1773391954797,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,11]]},"DOI":"10.1145\/3610978.3641080","type":"proceedings-article","created":{"date-parts":[[2024,3,10]],"date-time":"2024-03-10T22:55:43Z","timestamp":1710111343000},"page":"712-716","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["CognitiveDog: Large Multimodal Model Based System to Translate Vision and Language into Action of Quadruped Robot"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6119-2366","authenticated-orcid":false,"given":"Artem","family":"Lykov","sequence":"first","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0423-0499","authenticated-orcid":false,"given":"Mikhail","family":"Litvinov","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5979-487X","authenticated-orcid":false,"given":"Mikhail","family":"Konenkov","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0816-7442","authenticated-orcid":false,"given":"Rinat","family":"Prochii","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7297-6901","authenticated-orcid":false,"given":"Nikita","family":"Burtsev","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0613-5359","authenticated-orcid":false,"given":"Ali Alridha","family":"Abdulkarim","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2228-7298","authenticated-orcid":false,"given":"Artem","family":"Bazhenov","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4530-3518","authenticated-orcid":false,"given":"Vladimir","family":"Berman","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8055-5345","authenticated-orcid":false,"given":"Dzmitry","family":"Tsetserukou","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]}],"member":"320","published-online":{"date-parts":[[2024,3,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Michael Ahn Anthony Brohan Noah Brown Yevgen Chebotar Omar Cortes Byron David Chelsea Finn Chuyuan Fu Keerthana Gopalakrishnan Karol Hausman et al. 2022. Do as i can not as i say: Grounding language in robotic affordances. arXiv preprint arXiv:2204.01691 (2022)."},{"key":"e_1_3_2_2_2_1","unstructured":"Ebtesam Almazrouei Hamza Alobeidli Abdulaziz Alshamsi Alessandro Cappelli Ruxandra Cojocaru M\u00e9rouane Debbah \u00c9tienne Goffinet Daniel Hesslow Julien Launay Quentin Malartic Daniele Mazzotta Badreddine Noune Baptiste Pannier and Guilherme Penedo. 2023. The Falcon Series of Open Language Models. arxiv: 2311.16867 [cs.CL]"},{"key":"e_1_3_2_2_3_1","unstructured":"Anthony Brohan Noah Brown Justice Carbajal Yevgen Chebotar Xi Chen Krzysztof Choromanski Tianli Ding Danny Driess Avinava Dubey Chelsea Finn et al. 2023. Rt-2: Vision-language-action models transfer web knowledge to robotic control. arXiv preprint arXiv:2307.15818 (2023)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Anthony Brohan Noah Brown Justice Carbajal Yevgen Chebotar Joseph Dabis Chelsea Finn Keerthana Gopalakrishnan Karol Hausman Alex Herzog Jasmine Hsu et al. 2022. Rt-1: Robotics transformer for real-world control at scale. arXiv preprint arXiv:2212.06817 (2022).","DOI":"10.15607\/RSS.2023.XIX.025"},{"key":"e_1_3_2_2_5_1","volume-title":"Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478","author":"Chen Jun","year":"2023","unstructured":"Jun Chen, Deyao Zhu, Xiaoqian Shen, Xiang Li, Zechun Liu, Pengchuan Zhang, Raghuraman Krishnamoorthi, Vikas Chandra, Yunyang Xiong, and Mohamed Elhoseiny. 2023. Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478 (2023)."},{"key":"e_1_3_2_2_6_1","volume-title":"Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, et al.","author":"Driess Danny","year":"2023","unstructured":"Danny Driess, Fei Xia, Mehdi SM Sajjadi, Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, et al. 2023. Palm-e: An embodied multimodal language model. arXiv preprint arXiv:2303.03378 (2023)."},{"key":"e_1_3_2_2_7_1","unstructured":"Boston Dynamics. 2023. Robots That Can Chat. https:\/\/bostondynamics.com\/blog\/robots-that-can-chat."},{"key":"e_1_3_2_2_8_1","unstructured":"G. Brockman et al. 2022. Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt."},{"key":"e_1_3_2_2_9_1","volume-title":"Zhen Wang, Daisy Zhe Wang, and Zhiting Hu.","author":"Hao Shibo","year":"2023","unstructured":"Shibo Hao, Yi Gu, Haodi Ma, Joshua Jiahua Hong, Zhen Wang, Daisy Zhe Wang, and Zhiting Hu. 2023. Reasoning with Language Model is Planning with World Model. arxiv: 2305.14992 [cs.CL]"},{"key":"e_1_3_2_2_10_1","unstructured":"Wenlong Huang Fei Xia Ted Xiao Harris Chan Jacky Liang Pete Florence Andy Zeng Jonathan Tompson Igor Mordatch Yevgen Chebotar et al. 2022. Inner monologue: Embodied reasoning through planning with language models. arXiv preprint arXiv:2207.05608 (2022)."},{"key":"e_1_3_2_2_11_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_2_12_1","unstructured":"Yao Mu Qinglong Zhang Mengkang Hu Wenhai Wang Mingyu Ding Jun Jin Bin Wang Jifeng Dai Yu Qiao and Ping Luo. 2023. EmbodiedGPT: Vision-Language Pre-Training via Embodied Chain of Thought. arxiv: 2305.15021 [cs.RO]"},{"key":"e_1_3_2_2_13_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arxiv: 2303.08774 [cs.CL]"},{"key":"e_1_3_2_2_14_1","unstructured":"Vilius Petkauskas. 2023. ChatGPT injected into Boston Dynamics' Spot. https:\/\/cybernews.com\/tech\/chatgpt-google-boston-dynamics-spot\/."},{"key":"e_1_3_2_2_15_1","volume-title":"Introducing Gemini: Google's most capable AI model yet. https:\/\/blog.google\/technology\/ai\/google-gemini-ai\/#sundar-note [Online","author":"Pichai Sundar","year":"2023","unstructured":"Sundar Pichai and Demis Hassabis. 2023. Introducing Gemini: Google's most capable AI model yet. https:\/\/blog.google\/technology\/ai\/google-gemini-ai\/#sundar-note [Online; accessed 8-December-2023]."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.54097\/hset.v39i.6767"},{"key":"e_1_3_2_2_18_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_19_1","unstructured":"Chen Wei-Ming Wang Wei-Chen Han Song and Jimmy Shong. 2023. TinyChatEngine: On-Device LLM Inference Library. https:\/\/github.com\/mit-han-lab\/TinyChatEngine."},{"key":"e_1_3_2_2_20_1","volume-title":"Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Shaokun Zhang, Erkang Zhu, Beibin Li, Li Jiang, Xiaoyun Zhang, and Chi Wang. 2023. Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155 (2023)."},{"key":"e_1_3_2_2_21_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."}],"event":{"name":"HRI '24: ACM\/IEEE International Conference on Human-Robot Interaction","location":"Boulder CO USA","acronym":"HRI '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Companion of the 2024 ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610978.3641080","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610978.3641080","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:28:09Z","timestamp":1755826089000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610978.3641080"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,11]]},"references-count":21,"alternative-id":["10.1145\/3610978.3641080","10.1145\/3610978"],"URL":"https:\/\/doi.org\/10.1145\/3610978.3641080","relation":{},"subject":[],"published":{"date-parts":[[2024,3,11]]},"assertion":[{"value":"2024-03-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}