{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T07:06:06Z","timestamp":1767078366324,"version":"3.48.0"},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,3]]},"DOI":"10.1109\/dicta68720.2025.11302432","type":"proceedings-article","created":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T18:36:22Z","timestamp":1767033382000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Plastic-GPT: Multi-Hop Visual Reasoning for Marine Plastic Detection, Polymer Classification, and Recyclability Assessment"],"prefix":"10.1109","author":[{"given":"Anwaar","family":"Ulhaq","sequence":"first","affiliation":[{"name":"Central Queensland University (CQU),Australia"}]},{"given":"Khizer","family":"Ali","sequence":"additional","affiliation":[{"name":"Charles Sturt University (CSU),Australia"}]},{"given":"Jahan","family":"Hassan","sequence":"additional","affiliation":[{"name":"Central Queensland University (CQU),Australia"}]},{"given":"Sajid","family":"Javed","sequence":"additional","affiliation":[{"name":"Khalifa University (KU),UAE"}]},{"given":"Louise","family":"Hardman","sequence":"additional","affiliation":[{"name":"Plastic Collective,Australia"}]}],"member":"263","reference":[{"key":"ref1","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-45214-7_6"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1201\/9781003269779-10"},{"key":"ref4","article-title":"M3 cot: A novel benchmark for multi-domain multi-step multimodal chain-of-thought","author":"Chen","year":"2024","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27888"},{"key":"ref6","first-page":"2004","volume-title":"Ghost nets in the gulf of carpentaria","author":"Hardesty","year":"2021"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.636"},{"key":"ref8","first-page":"19730","article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"International conference on machine learning","author":"Li","year":"2023"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref11","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"In International conference on machine learning","author":"Radford","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.marpolbul.2021.112347"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.dib.2022.108072"},{"key":"ref14","article-title":"Qwen2-vl: Enhancing visionlanguage model\u2019s perception of the world at any resolution","author":"Wang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref15","article-title":"Multimodal chain-of-thought reasoning: A comprehensive survey","author":"Wang","year":"2025","journal-title":"arXiv preprint"},{"key":"ref16","article-title":"Llavacot: Let vision language models reason step-by-step","author":"Xu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.1"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"ref19","article-title":"Unsupervised visual chain-of-thought reasoning via preference optimization","author":"Zhao","year":"2025","journal-title":"arXiv preprint"},{"key":"ref20","article-title":"Asymmetry in low-rank adapters of foundation models","author":"Zhu","year":"2024","journal-title":"arXiv preprint"}],"event":{"name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","start":{"date-parts":[[2025,12,3]]},"location":"Adelaide, Australia","end":{"date-parts":[[2025,12,5]]}},"container-title":["2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11302408\/11302416\/11302432.pdf?arnumber=11302432","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T07:01:38Z","timestamp":1767078098000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11302432\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,3]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/dicta68720.2025.11302432","relation":{},"subject":[],"published":{"date-parts":[[2025,12,3]]}}}