{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:10:08Z","timestamp":1755821408167,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611814","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"6918-6927","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["SimHMR: A Simple Query-based Framework for Parameterized Human Mesh Reconstruction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8804-191X","authenticated-orcid":false,"given":"Zihao","family":"Huang","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8209-9021","authenticated-orcid":false,"given":"Min","family":"Shi","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4050-6324","authenticated-orcid":false,"given":"Chengxin","family":"Liu","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0884-5126","authenticated-orcid":false,"given":"Ke","family":"Xian","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9223-1863","authenticated-orcid":false,"given":"Zhiguo","family":"Cao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.471"},{"key":"e_1_3_2_1_2_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E. Hinton","author":"Ba Lei Jimmy","year":"2016","unstructured":"Lei Jimmy Ba, Jamie Ryan Kiros, and Geoffrey E. Hinton. 2016. Layer Normalization. arXiv Comput. Res. Repository, Vol. abs\/1607.06450 (2016)."},{"key":"e_1_3_2_1_3_1","volume-title":"Proc. Int. Conf. Mach. Learn. (Proceedings of Machine Learning Research","volume":"824","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is Space-Time Attention All You Need for Video Understanding?. In Proc. Int. Conf. Mach. Learn. (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 813--824. http:\/\/proceedings.mlr.press\/v139\/bertasius21a.html"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"e_1_3_2_1_5_1","unstructured":"Sebastian Borgeaud Arthur Mensch Jordan Hoffmann Trevor Cai Eliza Rutherford Katie Millican George van den Driessche Jean-Baptiste Lespiau Bogdan Damoc Aidan Clark Diego de Las Casas Aurelia Guy Jacob Menick Roman Ring Tom Hennigan Saffron Huang Loren Maggiore Chris Jones Albin Cassirer Andy Brock Michela Paganini Geoffrey Irving Oriol Vinyals Simon Osindero Karen Simonyan Jack W. Rae Erich Elsen and Laurent Sifre. 2022. Improving Language Models by Retrieving from Trillions of Tokens. In Proc. Int. Conf. Mach. Learn. (Proceedings of Machine Learning Research Vol. 162) Kamalika Chaudhuri Stefanie Jegelka Le Song Csaba Szepesv\u00e1ri Gang Niu and Sivan Sabato (Eds.). PMLR 2206--2240. https:\/\/proceedings.mlr.press\/v162\/borgeaud22a.html"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2929257"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19769-7_20"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_45"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00153"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_2"},{"key":"e_1_3_2_1_12_1","unstructured":"MMHuman3D Contributors. 2021. OpenMMLab 3D Human Parametric Model Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmhuman3d."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p19-1285"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2212.14052"},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. Int. Conf. Learn. Repr. OpenReview.net. https:\/\/openreview.net\/forum?id=YicbFdNTTy","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In Proc. Int. Conf. Learn. Repr. OpenReview.net. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_45"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02291478"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0229-5"},{"key":"e_1_3_2_1_19_1","volume-title":"Point Cloud Learning with Transformer. arXiv Comput. Res. Repository","author":"Han Xian-Feng","year":"2021","unstructured":"Xian-Feng Han, Yu-Jia Kuang, and Guo-Qiang Xiao. 2021. Point Cloud Learning with Transformer. arXiv Comput. Res. Repository, Vol. abs\/2104.13636 (2021). [arXiv]2104.13636 https:\/\/arxiv.org\/abs\/2104.13636"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.248"},{"key":"e_1_3_2_1_22_1","volume-title":"Proc. Adv. Neural Inf. Process. Syst., Marc'Aurelio Ranzato, Alina Beygelzimer, Yann N. Dauphin, Percy Liang, and Jennifer Wortman Vaughan (Eds.). 14745--14758","author":"Jiang Yifan","year":"2021","unstructured":"Yifan Jiang, Shiyu Chang, and Zhangyang Wang. 2021. TransGAN: Two Pure Transformers Can Make One Strong GAN, and That Can Scale Up. In Proc. Adv. Neural Inf. Process. Syst., Marc'Aurelio Ranzato, Alina Beygelzimer, Yann N. Dauphin, Percy Liang, and Jennifer Wortman Vaughan (Eds.). 14745--14758. https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/7c220a2091c26a7f5e9f1cfb099511e3-Abstract.html"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.24.12"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00015"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00744"},{"volume-title":"Proc. Int. Conf. Learn. Repr.","author":"Diederik","key":"e_1_3_2_1_26_1","unstructured":"Diederik P. Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. In Proc. Int. Conf. Learn. Repr."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01094"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00234"},{"key":"e_1_3_2_1_30_1","volume-title":"From Zero to Hero: On the Limitations of Zero-Shot Cross-Lingual Transfer with Multilingual Transformers. arXiv Comput. Res. Repository","author":"Lauscher Anne","year":"2020","unstructured":"Anne Lauscher, Vinit Ravishankar, Ivan Vulic, and Goran Glavas. 2020. From Zero to Hero: On the Limitations of Zero-Shot Cross-Lingual Transfer with Multilingual Transformers. arXiv Comput. Res. Repository, Vol. abs\/2005.00633 (2020). [arXiv]2005.00633 https:\/\/arxiv.org\/abs\/2005.00633"},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. Int. Conf. Learn. Repr. OpenReview.net. https:\/\/openreview.net\/forum?id=dwg5rXg1WS_","author":"Lee Kwonjoon","year":"2022","unstructured":"Kwonjoon Lee, Huiwen Chang, Lu Jiang, Han Zhang, Zhuowen Tu, and Ce Liu. 2022. ViTGAN: Training GANs with Vision Transformers. In Proc. Int. Conf. Learn. Repr. OpenReview.net. https:\/\/openreview.net\/forum?id=dwg5rXg1WS_"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00198"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_34"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01270"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818013"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/37401.37422"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00064"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_44"},{"key":"e_1_3_2_1_41_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv Comput. Res. Repository Vol. abs\/2303.08774 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.08774 [arXiv]2303.08774"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_36"},{"key":"e_1_3_2_1_43_1","volume-title":"High-Performance Deep Learning Library. In Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Proc. Adv. Neural Inf. Process. Syst., Vol. 32."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01123"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00055"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.617"},{"key":"e_1_3_2_1_47_1","volume-title":"TokenLearner: What Can 8 Learned Tokens Do for Images and Videos? arXiv Comput. Res. Repository","author":"Ryoo Michael S.","year":"2021","unstructured":"Michael S. Ryoo, A. J. Piergiovanni, Anurag Arnab, Mostafa Dehghani, and Anelia Angelova. 2021. TokenLearner: What Can 8 Learned Tokens Do for Images and Videos? arXiv Comput. Res. Repository, Vol. abs\/2106.11297 (2021). showeprint[arXiv]2106.11297 https:\/\/arxiv.org\/abs\/2106.11297"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00239"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00016"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_44"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01099"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00545"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_2"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01180"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01294"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00622"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548133"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01273"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01125"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Ce Zheng Matias Mendieta Taojiannan Yang and Chen Chen. 2022. HeatER: An Efficient and Unified Network for Human Reconstruction via Heatmap-based TransformER.","DOI":"10.1109\/CVPR52729.2023.01340"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611814","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611814","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:55:59Z","timestamp":1755820559000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611814"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":62,"alternative-id":["10.1145\/3581783.3611814","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611814","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}