{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T20:24:05Z","timestamp":1772310245536,"version":"3.50.1"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,1,21]],"date-time":"2025-01-21T00:00:00Z","timestamp":1737417600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,21]],"date-time":"2025-01-21T00:00:00Z","timestamp":1737417600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002241","name":"Japan Science and Technology Agency","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002241","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,1,21]]},"DOI":"10.1109\/sii59315.2025.10871045","type":"proceedings-article","created":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T18:17:07Z","timestamp":1739384227000},"page":"1053-1059","source":"Crossref","is-referenced-by-count":1,"title":["Integrating Multimodal Communication and Comprehension Evaluation during Human-Robot Collaboration for Increased Reliability of Foundation Model-based Task Planning Systems"],"prefix":"10.1109","author":[{"given":"Eden","family":"Martin","sequence":"first","affiliation":[{"name":"Universit&#x00E9; Catholique de Louvain (UCLouvain); 1 Place de l&#x2019;Universit&#x00E9;,Louvain-la-Neuve,Belgium,1348"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shoichi","family":"Hasegawa","sequence":"additional","affiliation":[{"name":"Ritsumeikan University,Kusatsu,Japan,525-8577"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jorge","family":"Solis","sequence":"additional","affiliation":[{"name":"Karlstad University,Karlstad,Sweden,651 88"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Benoit","family":"Macq","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; Catholique de Louvain (UCLouvain); 1 Place de l&#x2019;Universit&#x00E9;,Louvain-la-Neuve,Belgium,1348"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Renaud","family":"Ronsse","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; Catholique de Louvain (UCLouvain); 1 Place de l&#x2019;Universit&#x00E9;,Louvain-la-Neuve,Belgium,1348"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gustavo Alfonso","family":"Garcia Ricardez","sequence":"additional","affiliation":[{"name":"Ritsumeikan University,Kusatsu,Japan,525-8577"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lotfi","family":"El Hafi","sequence":"additional","affiliation":[{"name":"Ritsumeikan University,Kusatsu,Japan,525-8577"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tadahiro","family":"Taniguchi","sequence":"additional","affiliation":[{"name":"Ritsumeikan University,Kusatsu,Japan,525-8577"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"RT-1: Robotics Transformer for Real-World Control at Scale","author":"Brohan","year":"2022"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1109\/ICMA61710.2024.10633088","article-title":"GameVLM: A Decision-Making Framework for Robotic Task Planning based on Visual Language Models and Zero-Sum Games","author":"Mei","year":"2024"},{"key":"ref3","article-title":"Attention Is All You Need","author":"Vaswani","year":"2017"},{"key":"ref4","article-title":"Training Language Models to Follow Instructions with Human Feedback","author":"Ouyang","year":"2022"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634966"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.birob.2023.100131"},{"key":"ref7","article-title":"CLIPort: What and Where Pathways for Robotic Manipulation","author":"Shridhar","year":"2021"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341430"},{"key":"ref9","article-title":"Do As I Can, Not As I Say: Grounding Language in Robotic Affordances","author":"Ahn","year":"2022"},{"key":"ref10","article-title":"Language Models as Zero-Shot Planners: Extracting Actionable Knowledge for Embodied Agents","author":"Huang","year":"2022"},{"key":"ref11","article-title":"Interactive Task Planning with Language Models","author":"Li","year":"2023"},{"key":"ref12","article-title":"GPT-4V(ision) for Robotics: Multimodal Task Planning from Human Demonstration","author":"Wake","year":"2023"},{"key":"ref13","article-title":"Methods to Estimate Large Language Model Confidence","author":"Kotelanski","year":"2023"},{"key":"ref14","article-title":"Confidence Matters: Revisiting Intrinsic Self-Correction Capabilities of Large Language Models","author":"Li","year":"2024"},{"key":"ref15","article-title":"Gesture-informed Robot Assistance via Foundation Models","author":"Lin","year":"2023"},{"issue":"3","key":"ref16","first-page":"175","article-title":"Attention, Intentions, and the Structure of Discourse","volume":"12","author":"Grosz","year":"1986","journal-title":"Computational Linguistics"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1378773.1378805"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3310935"},{"key":"ref19","article-title":"Robot Task Planning and Situation Handling in Open Worlds","author":"Ding","year":"2022"},{"issue":"3","key":"ref20","first-page":"52","article-title":"Communication Science: The Role of Communication to Ensure Existence of Human","volume":"7","author":"Purwanto","year":"2018","journal-title":"Asian Journal of Management Sciences & Education"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CogInfoCom.2015.7390590"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2018.09.014"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389771"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN57019.2023.10309487"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CogInfoCom.2012.6421936"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA55696.2022.00127"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.22260\/ISARC2021\/0067"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3242671.3242675"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_21"},{"key":"ref30","article-title":"Robust Speech Recognition via Large-Scale Weak Supervision","author":"Radford","year":"2022"},{"key":"ref31","first-page":"1","article-title":"MediaPipe: A Framework for Perceiving and Processing Reality","volume-title":"Workshops of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)","author":"Lugaresi"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2022.2068353"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52733.2024.01370","article-title":"SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities","author":"Chen","year":"2024"}],"event":{"name":"2025 IEEE\/SICE International Symposium on System Integration (SII)","location":"Munich, Germany","start":{"date-parts":[[2025,1,21]]},"end":{"date-parts":[[2025,1,24]]}},"container-title":["2025 IEEE\/SICE International Symposium on System Integration (SII)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10870372\/10870581\/10871045.pdf?arnumber=10871045","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,20]],"date-time":"2025-02-20T19:49:50Z","timestamp":1740080990000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10871045\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,21]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/sii59315.2025.10871045","relation":{},"subject":[],"published":{"date-parts":[[2025,1,21]]}}}