{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:49Z","timestamp":1755907669477,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,24]],"date-time":"2024-05-24T00:00:00Z","timestamp":1716508800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,24]]},"DOI":"10.1145\/3670105.3670147","type":"proceedings-article","created":{"date-parts":[[2024,7,29]],"date-time":"2024-07-29T18:29:36Z","timestamp":1722277776000},"page":"251-255","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Zero-shot Behavior Detection Based on Multimodal Large Language Model Expansion."],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6901-2209","authenticated-orcid":false,"given":"Guangxin","family":"Wang","sequence":"first","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3858-317X","authenticated-orcid":false,"given":"Xue","family":"Li","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7084-9427","authenticated-orcid":false,"given":"Dongshen","family":"Guo","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5439-7298","authenticated-orcid":false,"given":"Qiang","family":"Duan","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7530-3326","authenticated-orcid":false,"given":"Qibin","family":"Chen","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0592-590X","authenticated-orcid":false,"given":"Kai","family":"Jiang","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9992-4067","authenticated-orcid":false,"given":"Rui","family":"Li","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3329-6349","authenticated-orcid":false,"given":"Zizhong","family":"Wei","sequence":"additional","affiliation":[{"name":"Inspur Academy of Science and Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,29]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.910878"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11121912"},{"key":"e_1_3_2_1_4_1","volume-title":"Design of large data based real-time perception information acquisition system for mining personnel safety. Modern Electronics Technique","author":"Chunhe I.","year":"2019","unstructured":"L.\u00a0I. Chunhe, Tao Shuai, and Technology. 2019. Design of large data based real-time perception information acquisition system for mining personnel safety. Modern Electronics Technique (2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.256"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00193"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.6234"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11760-008-0099-7"},{"key":"e_1_3_2_1_10_1","volume-title":"International conference on machine learning. PMLR, 12888\u201312900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning. PMLR, 12888\u201312900."},{"key":"e_1_3_2_1_11_1","volume-title":"Anomaly detection and localization in crowded scenes","author":"Li Weixin","year":"2013","unstructured":"Weixin Li, Vijay Mahadevan, and Nuno Vasconcelos. 2013. Anomaly detection and localization in crowded scenes. IEEE transactions on pattern analysis and machine intelligence 36, 1 (2013), 18\u201332."},{"key":"e_1_3_2_1_12_1","volume-title":"Simple open-vocabulary object detection with vision transformers. arxiv","author":"Minderer M","year":"2022","unstructured":"M Minderer, A Gritsenko, A Stone, M Neumann, D Weissenborn, A Dosovitskiy, A Mahendran, A Arnab, M Dehghani, Z Shen, 2022. Simple open-vocabulary object detection with vision transformers. arxiv 2022. arXiv preprint arXiv:2205.06230 2 (2022)."},{"key":"e_1_3_2_1_13_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_14_1","volume-title":"Actionclip: A new paradigm for video action recognition. arXiv preprint arXiv:2109.08472","author":"Wang Mengmeng","year":"2021","unstructured":"Mengmeng Wang, Jiazheng Xing, and Yong Liu. 2021. Actionclip: A new paradigm for video action recognition. arXiv preprint arXiv:2109.08472 (2021)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612490"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0218001416550077"},{"key":"e_1_3_2_1_17_1","volume-title":"Cogvlm: Visual expert for pretrained language models. arXiv preprint arXiv:2311.03079","author":"Wang Weihan","year":"2023","unstructured":"Weihan Wang, Qingsong Lv, Wenmeng Yu, Wenyi Hong, Ji Qi, Yan Wang, Junhui Ji, Zhuoyi Yang, Lei Zhao, Xixuan Song, 2023. Cogvlm: Visual expert for pretrained language models. arXiv preprint arXiv:2311.03079 (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"Yi: Open foundation models by 01. ai. arXiv preprint arXiv:2403.04652","author":"Young Alex","year":"2024","unstructured":"Alex Young, Bei Chen, Chao Li, Chengen Huang, Ge Zhang, Guanwei Zhang, Heng Li, Jiangcheng Zhu, Jianqun Chen, Jing Chang, 2024. Yi: Open foundation models by 01. ai. arXiv preprint arXiv:2403.04652 (2024)."}],"event":{"name":"CNIOT 2024: 2024 5th International Conference on Computing, Networks and Internet of Things","acronym":"CNIOT 2024","location":"Tokyo Japan"},"container-title":["Proceedings of the 2024 5th International Conference on Computing, Networks and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3670105.3670147","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3670105.3670147","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:50:28Z","timestamp":1755877828000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3670105.3670147"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,24]]},"references-count":18,"alternative-id":["10.1145\/3670105.3670147","10.1145\/3670105"],"URL":"https:\/\/doi.org\/10.1145\/3670105.3670147","relation":{},"subject":[],"published":{"date-parts":[[2024,5,24]]},"assertion":[{"value":"2024-07-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}