{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T13:24:15Z","timestamp":1752672255771},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/smc53992.2023.10393952","type":"proceedings-article","created":{"date-parts":[[2024,1,29]],"date-time":"2024-01-29T18:32:04Z","timestamp":1706553124000},"page":"427-432","source":"Crossref","is-referenced-by-count":1,"title":["Object Detection via Multi-Scale Token Based on Vision Transformer"],"prefix":"10.1109","author":[{"given":"Yu","family":"Xiao","sequence":"first","affiliation":[{"name":"College of Computer Science, Chongqing University,Chongqing,China"}]},{"given":"Tao","family":"Qiu","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University,Chongqing,China"}]},{"given":"Xinqi","family":"Jiang","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University,Chongqing,China"}]},{"given":"Qi","family":"Yang","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University,Chongqing,China"}]},{"given":"Zhaowei","family":"Shang","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University,Chongqing,China"}]},{"given":"Taiping","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University,Chongqing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.00061"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr46437.2021.00681"},{"journal-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","year":"2020","author":"Alexey","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/iros55552.2023.10342025"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_23"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00033"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"ref12","article-title":"Efficientnet: Rethinking model scaling for convolutional neural networks","author":"Tan","year":"2019","journal-title":"PMLR"},{"key":"ref13","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534225"},{"journal-title":"Yolov4: Optimal speed and accuracy of object detection","year":"2020","author":"Alexey","key":"ref16"},{"journal-title":"FSSD: feature fusion single shot multibox detector","year":"2017","author":"Zuoxin","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2022.3164083"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"ref20","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","author":"Xie","year":"2021","journal-title":"NIPS"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00062"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"journal-title":"CoAtNet: Marrying convolution and attention for all data sizes","year":"2021","author":"Dai","key":"ref25"}],"event":{"name":"2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","start":{"date-parts":[[2023,10,1]]},"location":"Honolulu, Oahu, HI, USA","end":{"date-parts":[[2023,10,4]]}},"container-title":["2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10391856\/10393862\/10393952.pdf?arnumber=10393952","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,3]],"date-time":"2024-03-03T06:52:19Z","timestamp":1709448739000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10393952\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/smc53992.2023.10393952","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}