{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T06:00:33Z","timestamp":1780639233290,"version":"3.54.1"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T00:00:00Z","timestamp":1776643200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T00:00:00Z","timestamp":1776643200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,4,20]]},"DOI":"10.23919\/date69613.2026.11539390","type":"proceedings-article","created":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T19:53:10Z","timestamp":1780602790000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Toward Parallel Serving for Vision-Language Models via Modal Decoupling and Scheduling"],"prefix":"10.23919","author":[{"given":"Yijia","family":"Yang","sequence":"first","affiliation":[{"name":"Capital Normal University,College of Information Engineering,Beijing,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yubo","family":"Deng","sequence":"additional","affiliation":[{"name":"Capital Normal University,College of Information Engineering,Beijing,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yida","family":"Wang","sequence":"additional","affiliation":[{"name":"Capital Normal University,College of Information Engineering,Beijing,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuanchao","family":"Xu","sequence":"additional","affiliation":[{"name":"Capital Normal University,College of Information Engineering,Beijing,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Keni","family":"Qiu","sequence":"additional","affiliation":[{"name":"Capital Normal University,College of Information Engineering,Beijing,China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324920000601"},{"key":"ref2","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"ref3","article-title":"Roberta: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.2139\/ssrn.4783140","article-title":"Exploring the frontier of vision-language models: A survey of current methodologies and future directions","author":"Ghosh","year":"2024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/d41586-023-00816-5"},{"key":"ref6","article-title":"Gemini: a family of highly capable multimodal models","author":"Team","year":"2023"},{"key":"ref7","article-title":"Microsoft coco captions: Data collection and evaluation server","author":"Chen","year":"2015"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"ref9","volume-title":"Image-text-to-text models","year":"2024"},{"key":"ref10","first-page":"1587","article-title":"A survey of state of the art large vision language models: Benchmark evaluations and challenges","volume-title":"Proceedings of the Computer Vision and Pattern Recognition Conference","author":"Li"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386743"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref13","article-title":"Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution","author":"Wang","year":"2024"},{"key":"ref14","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref16","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref17","first-page":"521","article-title":"Orca: A distributed serving system for Transformer-Based generative models","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2000"},{"key":"ref19","article-title":"Metron: Holistic performance evaluation framework for llm inference systems","author":"Agrawal","year":"2024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS66672.2025.00038"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01841"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.00339"},{"key":"ref23","volume-title":"Nvidia mps","year":"2022"},{"key":"ref24","volume-title":"Inter-process communication"},{"key":"ref25","volume-title":"Sharegpt"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref27","author":"Qiu","year":"2025","journal-title":"Modserve: Scalable and resource-efficient large multimodal model serving"},{"key":"ref28","author":"Wu","year":"2024","journal-title":"Fast distributed inference serving for large language models"},{"key":"ref29","article-title":"Fastswitch: Optimizing context switching efficiency in fairness-aware large language model serving","author":"Shen","year":"2024"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3721146.3721956"}],"event":{"name":"2026 Design, Automation &amp; Test in Europe Conference (DATE)","location":"Verona, Italy","start":{"date-parts":[[2026,4,20]]},"end":{"date-parts":[[2026,4,22]]}},"container-title":["2026 Design, Automation &amp;amp; Test in Europe Conference (DATE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11539023\/11539024\/11539390.pdf?arnumber=11539390","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:07:39Z","timestamp":1780636059000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11539390\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,20]]},"references-count":30,"URL":"https:\/\/doi.org\/10.23919\/date69613.2026.11539390","relation":{},"subject":[],"published":{"date-parts":[[2026,4,20]]}}}