{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T01:01:09Z","timestamp":1730250069486,"version":"3.28.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100018928","name":"Westlake University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100018928","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icme57554.2024.10688120","type":"proceedings-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T17:24:16Z","timestamp":1727717056000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Context-Aware Text-Assisted Multimodal Framework for Cervical Cytology Cell Diagnosis and Chatting"],"prefix":"10.1109","author":[{"given":"Yuxuan","family":"Sun","sequence":"first","affiliation":[{"name":"Zhejiang University,College of Computer Science and Technology,China"}]},{"given":"Chenglu","family":"Zhu","sequence":"additional","affiliation":[{"name":"Westlake University,Research Center for Industries of the Future and School of Engineering,China"}]},{"given":"Sunyi","family":"Zheng","sequence":"additional","affiliation":[{"name":"Westlake University,Research Center for Industries of the Future and School of Engineering,China"}]},{"given":"Yunlong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Computer Science and Technology,China"}]},{"given":"Honglin","family":"Li","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Computer Science and Technology,China"}]},{"given":"Lin","family":"Yang","sequence":"additional","affiliation":[{"name":"Westlake University,Research Center for Industries of the Future and School of Engineering,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1309\/AJCPTGD94EVRSJCG"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2019.09.015"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3060447"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"article-title":"Visualbert: A simple and performant baseline for vision and language","year":"2019","author":"Li","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-019-0052-1"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87199-4_59"},{"year":"2022","key":"ref11","article-title":"Introducing chatgpt"},{"key":"ref12","article-title":"Gpt-4 technical report","volume":"2303","author":"OpenAI","year":"2023","journal-title":"ArXiv"},{"article-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref13"},{"article-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality","year":"2023","author":"Chiang","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06291-2"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1038\/s44172-024-00271-8"},{"article-title":"Chatdoctor: A medical chat model fine-tuned on llama model using medical domain knowledge","year":"2023","author":"Yunxiang","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1111\/j.1365-2303.2007.00469.x"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"article-title":"Roberta: A robustly optimized bert pretraining approach","year":"2019","author":"Liu","key":"ref20"},{"article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","year":"2023","author":"Zheng","key":"ref21"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2024,7,15]]},"location":"Niagara Falls, ON, Canada","end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10685847\/10687354\/10688120.pdf?arnumber=10688120","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T06:23:31Z","timestamp":1727763811000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10688120\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/icme57554.2024.10688120","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}