{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T18:41:19Z","timestamp":1762368079516,"version":"build-2065373602"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council (NSERC) of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000196","name":"Canada Foundation for Innovation (CFI)","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000196","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,31]]},"DOI":"10.1109\/mlsp62443.2025.11204323","type":"proceedings-article","created":{"date-parts":[[2025,10,24]],"date-time":"2025-10-24T17:15:52Z","timestamp":1761326152000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Learning or Cheating? Assessing Data Contamination in Large Vision-Language Models"],"prefix":"10.1109","author":[{"given":"Ahmed","family":"Masry","sequence":"first","affiliation":[{"name":"York University,Department of Electrical Engineering and Computer Science,Toronto,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahir","family":"Ahmed","sequence":"additional","affiliation":[{"name":"York University,Department of Electrical Engineering and Computer Science,Toronto,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ridwan","family":"Mahbub","sequence":"additional","affiliation":[{"name":"York University,Department of Electrical Engineering and Computer Science,Toronto,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Qwen2vl: Enhancing vision-language model\u2019s perception of the world at any resolution","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv"},{"volume-title":"Expanding performance boundaries of open-source multimodal models with model, data, and test-time scaling","year":"2025","author":"Chen","key":"ref2"},{"volume-title":"Phi-3 technical report: A highly capable language model locally on your phone","year":"2024","author":"Abdin","key":"ref3"},{"key":"ref4","article-title":"Llavaonevision: Easy visual task transfer","author":"Li","year":"2024","journal-title":"arXiv preprint arXiv"},{"journal-title":"OpenAI","article-title":"Gpt-4 technical report","year":"2024","key":"ref5"},{"journal-title":"Gemini Team","article-title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","year":"2024","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.177"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/wacv51458.2022.00264"},{"volume-title":"Mathvista: Evaluating mathematical reasoning of foundation models in visual contexts","year":"2024","author":"Lu","key":"ref10"},{"journal-title":"Anthropic","article-title":"Introducing the next generation of claude","year":"2024","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.18"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.716"},{"volume-title":"Both text and images leaked! a systematic analysis of multimodal 11 m data contamination","year":"2025","author":"Song","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.277"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.811"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICDARW.2019.10029"},{"volume-title":"Cord: A consolidated receipt dataset for post-ocr parsing","author":"Park","key":"ref19"},{"volume-title":"Language models are few-shot learners","year":"2020","author":"Brown","key":"ref20"},{"journal-title":"Llama 3 Team","article-title":"The llama 3 herd of models","year":"2024","key":"ref21"},{"volume-title":"Ovis: Structural embedding alignment for multimodal large language model","year":"2024","author":"Lu","key":"ref22"},{"journal-title":"SmolVLM","article-title":"Smolvlm - small yet mighty vision language model","year":"2024","key":"ref23"},{"volume-title":"Deepseekv 12: Mixture-of-experts vision-language models for advanced multimodal understanding","year":"2024","author":"Wu","key":"ref24"},{"journal-title":"Spacy","article-title":"Spacy","year":"2024","key":"ref25"},{"journal-title":"The pandas development team","article-title":"pandas-dev\/pandas: Pandas","year":"2020","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3685520"}],"event":{"name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","start":{"date-parts":[[2025,8,31]]},"location":"Istanbul, Turkiye","end":{"date-parts":[[2025,9,3]]}},"container-title":["2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11204201\/11204202\/11204323.pdf?arnumber=11204323","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T18:37:07Z","timestamp":1762367827000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11204323\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,31]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/mlsp62443.2025.11204323","relation":{},"subject":[],"published":{"date-parts":[[2025,8,31]]}}}