{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T01:59:18Z","timestamp":1772589558303,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Open Research Project Programme of the State Key Laboratory of Internet of Things for Smart City(University of Macau)(Ref. No.:SKL-IoTSC(UM)-2021-2023\/ORP\/GA05\/2022)"},{"name":"The FDCT grants 0154\/2022\/A3"},{"name":"The Major basic research project of Shandong Natural Science Foundation","award":["No. ZR2021ZD15"],"award-info":[{"award-number":["No. ZR2021ZD15"]}]},{"name":"The MYRG-CRG2022-00013-IOTSC-ICI grant"},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62106128,No. 62176139"],"award-info":[{"award-number":["No. 62106128,No. 62176139"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Shandong Province","award":["No. ZR2021QF001"],"award-info":[{"award-number":["No. ZR2021QF001"]}]},{"name":"The Young Elite Scientists Sponsorship Program by CAST","award":["No. 2021QNRC001"],"award-info":[{"award-number":["No. 2021QNRC001"]}]},{"name":"SKL-IOTSC(UM)-2021-2023"},{"name":"The Open project of Key Laboratory of Artificial Intelligence, Ministry of Education"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611978","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"105-115","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["Clip Fusion with Bi-level Optimization for Human Mesh Reconstruction from Monocular Videos"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1137-0720","authenticated-orcid":false,"given":"Peng","family":"Wu","sequence":"first","affiliation":[{"name":"Shandong University, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9543-6960","authenticated-orcid":false,"given":"Xiankai","family":"Lu","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2656-3082","authenticated-orcid":false,"given":"Jianbing","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8465-1294","authenticated-orcid":false,"given":"Yilong","family":"Yin","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00542"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00351"},{"key":"e_1_3_2_2_3_1","volume-title":"Delta-stn: Efficient bilevel optimization for neural networks using structured response jacobians. In Advances in Neural Information Processing Systems. 21725--21737.","author":"Bae Juhan","year":"2020","unstructured":"Juhan Bae and Roger B Grosse. 2020. Delta-stn: Efficient bilevel optimization for neural networks using structured response jacobians. In Advances in Neural Information Processing Systems. 21725--21737."},{"key":"e_1_3_2_2_4_1","volume-title":"Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473","author":"Bahdanau Dzmitry","year":"2014","unstructured":"Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2014. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_2_7_1","volume-title":"Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio.","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho, Bart Van Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00200"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_45"},{"key":"e_1_3_2_2_10_1","unstructured":"Carl Doersch and Andrew Zisserman. 2019. Sim2real transfer learning for 3d human pose estimation: motion to the rescue. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_13"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01106"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"e_1_3_2_2_14_1","volume-title":"Humans in 4D: Reconstructing and Tracking Humans with Transformers. arXiv preprint arXiv:2305.20091","author":"Goel Shubham","year":"2023","unstructured":"Shubham Goel, Georgios Pavlakos, Jathushan Rajasegaran, Angjoo Kanazawa, and Jitendra Malik. 2023. Humans in 4D: Reconstructing and Tracking Humans with Transformers. arXiv preprint arXiv:2305.20091 (2023)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00739"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00633"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3194167"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01033"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00055"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126500"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.248"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00015"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00744"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00576"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01094"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00234"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00463"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01140"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01215"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_28"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00607"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_34"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00071"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01270"},{"key":"e_1_3_2_2_38_1","volume-title":"Darts: Differentiable architecture search. arXiv preprint arXiv:1806.09055","author":"Liu Hanxiao","year":"2018","unstructured":"Hanxiao Liu, Karen Simonyan, and Yiming Yang. 2018. Darts: Differentiable architecture search. arXiv preprint arXiv:1806.09055 (2018)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3132674"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00816"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661229.2661273"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818013"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3040258"},{"key":"e_1_3_2_2_44_1","volume-title":"Segmenting objects from relational visual data","author":"Lu Xiankai","year":"2021","unstructured":"Xiankai Lu, Wenguan Wang, Jianbing Shen, David J Crandall, and Luc Van Gool. 2021. Segmenting objects from relational visual data. IEEE transactions on pattern analysis and machine intelligence, Vol. 44, 11 (2021), 7885--7897."},{"key":"e_1_3_2_2_45_1","volume-title":"Proceedings of the Asian Conference on Computer Vision.","author":"Luo Zhengyi","year":"2020","unstructured":"Zhengyi Luo, S Alireza Golestaneh, and Kris M Kitani. 2020. 3d human motion estimation via motion compression and refinement. In Proceedings of the Asian Conference on Computer Vision."},{"key":"e_1_3_2_2_46_1","volume-title":"Self-tuning networks: Bilevel optimization of hyperparameters using structured best-response functions. arXiv preprint arXiv:1903.03088","author":"MacKay Matthew","year":"2019","unstructured":"Matthew MacKay, Paul Vicol, Jon Lorraine, David Duvenaud, and Roger Grosse. 2019. Self-tuning networks: Bilevel optimization of hyperparameters using structured best-response functions. arXiv preprint arXiv:1903.03088 (2019)."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00554"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00064"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_44"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01123"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00154"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2023.123456"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.19"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2017.2712906"},{"key":"e_1_3_2_2_55_1","unstructured":"Heinrich von Stackelberg et al. 1952. Theory of the market economy. (1952)."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00545"},{"key":"e_1_3_2_2_57_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01279"},{"key":"e_1_3_2_2_60_1","volume-title":"A bi-level framework for learning to solve combinatorial optimization on graphs. Advances in Neural Information Processing Systems","author":"Wang Runzhong","year":"2021","unstructured":"Runzhong Wang, Zhigang Hua, Gan Liu, Jiayi Zhang, Junchi Yan, Feng Qi, Shuang Yang, Jun Zhou, and Xiaokang Yang. 2021. A bi-level framework for learning to solve combinatorial optimization on graphs. Advances in Neural Information Processing Systems (2021), 21453--21466."},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01286"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01457"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00850"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01425"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01273"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00181"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01125"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01288"},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.280"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547844"},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00153"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611978","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611978","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:10:23Z","timestamp":1755821423000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611978"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":71,"alternative-id":["10.1145\/3581783.3611978","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611978","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}