{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:18:52Z","timestamp":1766269132985,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592246","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"617-621","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["TNOD: Transformer Network with Object Detection for Tag Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2721-1611","authenticated-orcid":false,"given":"Kai","family":"Feng","sequence":"first","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8875-6711","authenticated-orcid":false,"given":"Tao","family":"Liu","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1356-2905","authenticated-orcid":false,"given":"Heng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6572-6175","authenticated-orcid":false,"given":"Zihao","family":"Meng","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1651-9591","authenticated-orcid":false,"given":"Zemin","family":"Miao","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"VATT: Transformers for multimodal self-supervised learning from raw video, audio and text. In Advances in Neural Information Processing Systems, Vol.\u00a034. Curran Associates","author":"Akbari Hassan","year":"2021","unstructured":"Hassan Akbari, Liangzhe Yuan, Rui Qian, Wei-Hong Chuang, Shih-Fu Chang, Yin Cui, and Boqing Gong. 2021. VATT: Transformers for multimodal self-supervised learning from raw video, audio and text. In Advances in Neural Information Processing Systems, Vol.\u00a034. Curran Associates, Inc., 24206\u201324221."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2020.2988983"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/fi13050129"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466876"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3074599"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.06.056"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380182"},{"key":"e_1_3_2_1_10_1","unstructured":"Douwe Kiela Suvrat Bhooshan Hamed Firooz Ethan Perez and Davide Testuggine. 2020. Supervised multimodal bitransformers for classifying images and text. http:\/\/arxiv.org\/abs\/1909.02950"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-020-01515-7"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.01.091"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Chuyi Li Lulu Li Hongliang Jiang Kaiheng Weng Yifei Geng Liang Li Zaidan Ke Qingyuan Li Meng Cheng Weiqiang Nie Yiduo Li Bo Zhang Yufei Liang Linyuan Zhou Xiaoming Xu Xiangxiang Chu Xiaoming Wei and Xiaolin Wei. 2022. YOLOv6: A single-stage object detection framework for industrial applications. https:\/\/doi.org\/10.48550\/arXiv.2209.02976","DOI":"10.48550\/arXiv.2209.02976"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357912"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2019.2932406"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISM52913.2021.00035"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2992941"},{"volume-title":"Advances in Neural Information Processing Systems (Long Beach, California, USA) (NIPS\u201917, Vol.\u00a030). Curran Associates","author":"Vaswani Ashish","key":"e_1_3_2_1_18_1","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems (Long Beach, California, USA) (NIPS\u201917, Vol.\u00a030). Curran Associates, Inc., 6000\u20136010."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350858"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2989473"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/3172077.3172367"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015805"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592246","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592246","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:22Z","timestamp":1750182682000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592246"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":22,"alternative-id":["10.1145\/3591106.3592246","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592246","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}