{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T07:22:20Z","timestamp":1778052140351,"version":"3.51.4"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,6]]},"DOI":"10.1109\/wacv61042.2026.00822","type":"proceedings-article","created":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T19:59:32Z","timestamp":1778011172000},"page":"8521-8530","source":"Crossref","is-referenced-by-count":0,"title":["Chain-of-Look Spatial Reasoning for Dense Surgical Instrument Counting"],"prefix":"10.1109","author":[{"given":"Rishikesh","family":"Bhyri","sequence":"first","affiliation":[{"name":"State University of New York at Buffalo"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brian R","family":"Quaranto","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junsong","family":"Yuan","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter C W","family":"Kim","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nan","family":"Xi","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Open-world text-specified object counting","author":"Amini-Naieni","year":"2023"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1547"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_33"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_30"},{"key":"ref5","article-title":"Qwen2. 5-vl technical report","author":"Bai","year":"2025"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/3477.775269"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01607"},{"key":"ref8","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional trans-formers for language understanding","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers)","author":"Devlin"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.01730"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.4159\/9780674037533"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02681"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73116-7_17"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28050"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2006.197"},{"key":"ref17","first-page":"1324","article-title":"Learning to count objects in images","volume-title":"Proceedings of the 24th International Conference on Neural Information Processing Systems - Volume 1","author":"Lempitsky"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2018.2858826"},{"key":"ref19","article-title":"Countr: Transformer-based generalised visual counting","author":"Liu","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1167\/8.6.115"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1049\/ic:19970387"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02198"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02647"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.01217"},{"key":"ref26","article-title":"Roboflow: Computer vision platform","year":"2025"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01824"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1800"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611898"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00286"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1080\/21681163.2016.1149104"},{"key":"ref32","article-title":"Sep: Self-enhanced prompt tuning for visual-language model","author":"Yao","year":"2024"}],"event":{"name":"2026 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","location":"Tucson, AZ, USA","start":{"date-parts":[[2026,3,6]]},"end":{"date-parts":[[2026,3,10]]}},"container-title":["2026 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11491838\/11491925\/11492231.pdf?arnumber=11492231","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:41:46Z","timestamp":1778049706000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11492231\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,6]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/wacv61042.2026.00822","relation":{},"subject":[],"published":{"date-parts":[[2026,3,6]]}}}