{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:40:46Z","timestamp":1766061646831,"version":"3.48.0"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004147","name":"Tsinghua University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246718","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"20960-20967","source":"Crossref","is-referenced-by-count":0,"title":["BookBot: A Robotic Manipulation Benchmark for Voice-Driven Book Recognition and Grasping in Cluttered Environments"],"prefix":"10.1109","author":[{"given":"Huaqiang","family":"Wang","sequence":"first","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiang","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yali","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengjin","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801682"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802284"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802439"},{"article-title":"Pp-ocr: A practical ultra lightweight ocr system","year":"2020","author":"Du","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342260"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00883"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341635"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341160"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00084"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/iros45743.2020.9341164"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1002\/wics.101"},{"article-title":"Qwen2 technical report","year":"2024","author":"Yang","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2646371"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01472"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00983"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00035"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3068335"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611477"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2025.xxi.152"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.120"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341379"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197318"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561877"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636012"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3281153"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811961"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611277"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2020.00051"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01146"},{"article-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19\u20131423"},{"article-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref32"},{"issue":"8","key":"ref33","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref34","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Gpt-4 technical report","year":"2023","author":"Achiam","key":"ref35"},{"article-title":"Gemini: a family of highly capable multimodal models","year":"2023","author":"Team","key":"ref36"},{"key":"ref37","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Phi-4 technical report","year":"2024","author":"Abdin","key":"ref38"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.264"},{"article-title":"Rekep: Spatio-temporal reasoning of relational keypoint constraints for robotic manipulation","year":"2024","author":"Huang","key":"ref40"},{"article-title":"Dinov2: Learning robust visual features without supervision","year":"2023","author":"Oquab","key":"ref41"},{"key":"ref42","first-page":"287","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"Conference on Robot Learning (CoRL)","author":"Brohan"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801352"},{"article-title":"Look before you leap: Unveiling the power of gpt-4v in robotic vision-language planning","year":"2023","author":"Hu","key":"ref44"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/LSENS.2021.3091640"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TNSRE.2020.3007532"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0090-8"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1405.0312"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"article-title":"Detectron2","year":"2019","author":"Wu","key":"ref53"},{"article-title":"Accurate, large minibatch sgd: Training imagenet in 1 hour","year":"2017","author":"Goyal","key":"ref54"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246718.pdf?arnumber=11246718","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:36:57Z","timestamp":1766061417000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246718\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246718","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}