{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:14:33Z","timestamp":1765340073188,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","funder":[{"DOI":"10.13039\/100007219","name":"Natural Science Foundation of Shanghai","doi-asserted-by":"publisher","award":["25ZR1401016"],"award-info":[{"award-number":["25ZR1401016"]}],"id":[{"id":"10.13039\/100007219","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755299","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:51Z","timestamp":1761377211000},"page":"11825-11833","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A Satellite-Ground Synergistic Large Vision-Language Model System for Earth Observation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7349-3780","authenticated-orcid":false,"given":"Yuxin","family":"Zhang","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6628-5483","authenticated-orcid":false,"given":"Jiahao","family":"Yang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3215-2696","authenticated-orcid":false,"given":"Zhe","family":"Chen","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China and SpaceAIC Co., Ltd., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2420-2774","authenticated-orcid":false,"given":"Wenjun","family":"Zhu","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China and SpaceAIC Co., Ltd., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9807-2648","authenticated-orcid":false,"given":"Jin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6502-9910","authenticated-orcid":false,"given":"Yue","family":"Gao","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghia, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. ''Small Satellites and Big Antennas''. Available: https:\/\/www.raspberrypi.com\/news\/small-satellites-and-big-antennas."},{"key":"e_1_3_2_1_2_1","unstructured":"2024. ''NORAD GP Element Sets''. Available: https:\/\/celestrak.org\/NORAD\/elements\/."},{"key":"e_1_3_2_1_3_1","unstructured":"2024. ''Planet''. Available: https:\/\/www.planet.com\/."},{"key":"e_1_3_2_1_4_1","unstructured":"2024. ''Planet Labs PBC Announces Real-Time Insights Technology Using NVIDIA Jetson Platform''. Available: https:\/\/www.businesswire.com\/news\/home\/20240610385569\/en\/Planet-Labs-PBC-Announces-Real-Time-Insights-Technology-Using-NVIDIA-Jetson-Platform."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.001.2100795"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2015.18"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs12244184"},{"key":"e_1_3_2_1_8_1","volume-title":"Petals: Collaborative Inference and Fine-tuning of Large Models. arXiv preprint arXiv:2209.01188","author":"Borzunov Alexander","year":"2022","unstructured":"Alexander Borzunov, Dmitry Baranchuk, Tim Dettmers, Max Ryabinin, Younes Belkada, Artem Chumachenko, Pavel Samygin, and Colin Raffel. 2022. Petals: Collaborative Inference and Fine-tuning of Large Models. arXiv preprint arXiv:2209.01188 (2022)."},{"key":"e_1_3_2_1_9_1","first-page":"1865","article-title":"Remote Sensing Image Scene Classification","volume":"105","author":"Cheng Gong","year":"2017","unstructured":"Gong Cheng, Junwei Han, and Xiaoqiang Lu. 2017. Remote Sensing Image Scene Classification: Benchmark and State of the Art. Proc. IEEE 105, 10 (2017), 1865-1883.","journal-title":"Benchmark and State of the Art. Proc. IEEE"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378473"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs13163209"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680712"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scitotenv.2020.140033"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2018.2802438"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3415661"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3362475"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3685647"},{"key":"e_1_3_2_1_18_1","volume-title":"RSGPT: A Remote Sensing Vision Language Model and Benchmark. arXiv preprint arXiv:2307.15266","author":"Hu Yuan","year":"2023","unstructured":"Yuan Hu, Jianlong Yuan, CongcongWen, Xiaonan Lu, and Xiang Li. 2023. RSGPT: A Remote Sensing Vision Language Model and Benchmark. arXiv preprint arXiv:2307.15266 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Scaling Laws for Neural Language Models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling Laws for Neural Language Models. arXiv preprint arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_1_20_1","volume-title":"Proc. of the 37th IEEE\/CVF CVPR. 27831-27840","author":"Kuckreja Kartik","year":"2024","unstructured":"Kartik Kuckreja, Muhammad Sohail Danish, Muzammal Naseer, Abhijit Das, Salman Khan, and Fahad Shahbaz Khan. 2024. GeoChat: Grounded Large Vision- Language Model for Remote Sensing. In Proc. of the 37th IEEE\/CVF CVPR. 27831-27840."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01540"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3592519"},{"key":"e_1_3_2_1_23_1","volume-title":"FedSN: A Federated Learning Framework over Heterogeneous LEO Satellite Networks","author":"Lin Zheng","year":"2024","unstructured":"Zheng Lin, Zhe Chen, Zihan Fang, Xianhao Chen, Xiong Wang, and Yue Gao. 2024. FedSN: A Federated Learning Framework over Heterogeneous LEO Satellite Networks. IEEE Trans. Mobile Comput. (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"Leung","author":"Lin Zheng","year":"2025","unstructured":"Zheng Lin, Guanqiao Qu, Wei Wei, Xianhao Chen, and Kin K. Leung. 2025. AdaptSFL: Adaptive Split Federated Learning in Resource-Constrained Edge Networks. IEEE\/ACM Trans. Netw. (2025)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3359040"},{"key":"e_1_3_2_1_26_1","volume-title":"RemoteCLIP: A Vision Language Foundation Model for Remote Sensing","author":"Liu Fan","year":"2024","unstructured":"Fan Liu, Delong Chen, Zhangqingyun Guan, Xiaocong Zhou, Jiale Zhu, Qiaolin Ye, Liyong Fu, and Jun Zhou. 2024. RemoteCLIP: A Vision Language Foundation Model for Remote Sensing. IEEE Trans. Geosci. Remote Sens. (2024)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3686138.3686140"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.2988782"},{"key":"e_1_3_2_1_29_1","unstructured":"Jacob Manning David Langerman Barath Ramesh Evan Gretok Christopher Wilson Alan George James MacKinnon and Gary Crum. 2018. Machine-Learning Space Applications on SmallSat Platforms with TensorFlow. (2018)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00676"},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of th 12th NSDI. 117-130","author":"Pfaff Ben","year":"2015","unstructured":"Ben Pfaff, Justin Pettit, Teemu Koponen, Ethan Jackson, Andy Zhou, Jarno Rajahalme, Jesse Gross, Alex Wang, Joe Stringer, Pravin Shelar, et al. 2015. The Design and Implementation of Open vSwitch. In Proc. of th 12th NSDI. 117-130."},{"key":"e_1_3_2_1_32_1","volume-title":"Proc. of the 38th ICML. 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proc. of the 38th ICML. 8748-8763."},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. of the 38th ICML. 8821-8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. In Proc. of the 38th ICML. 8821-8831."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-021-24638-z"},{"key":"e_1_3_2_1_35_1","volume-title":"Shlok Mehrotra, Bill Tao, Rem Yang, Han Zhao, and Deepak Vasisht.","author":"Shenoy Jayanth","year":"2024","unstructured":"Jayanth Shenoy, Xinjian Davis Zhang, Shlok Mehrotra, Bill Tao, Rem Yang, Han Zhao, and Deepak Vasisht. 2024. S4: Self-Supervised Sensing Across the Spectrum. arXiv preprint arXiv:2405.01656 (2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"Proc. of the 21st NSDI. 825-840","author":"Singh Vaibhav","year":"2024","unstructured":"Vaibhav Singh, Tusher Chakraborty, Suraj Jog, Om Chabra, Deepak Vasisht, and Ranveer Chandra. 2024. Spectrumize: Spectrum-Efficient Satellite Networks for the Internet of Things. In Proc. of the 21st NSDI. 825-840."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00085"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3592521"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472932"},{"key":"e_1_3_2_1_40_1","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et al. 2024. Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3587438"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680779"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672268"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00418"},{"key":"e_1_3_2_1_45_1","volume-title":"LVLM-EHub: A Comprehensive Evaluation Benchmark for Large Vision-Language Models","author":"Xu Peng","year":"2024","unstructured":"Peng Xu, Wenqi Shao, Kaipeng Zhang, Peng Gao, Shuo Liu, Meng Lei, Fanqing Meng, Siyuan Huang, Yu Qiao, and Ping Luo. 2024. LVLM-EHub: A Comprehensive Evaluation Benchmark for Large Vision-Language Models. IEEE Trans. Pattern Anal. Mach. Intell. (2024), 1-18."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1186\/s43020-019-0006-0"},{"key":"e_1_3_2_1_47_1","first-page":"6866","article-title":"Edge-Cloud Polarization and Collaboration: A Comprehensive Survey for AI","volume":"35","author":"Yao Jiangchao","year":"2022","unstructured":"Jiangchao Yao, Shengyu Zhang, Yang Yao, FengWang, Jianxin Ma, Jianwei Zhang, Yunfei Chu, Luo Ji, Kunyang Jia, Tao Shen, et al. 2022. Edge-Cloud Polarization and Collaboration: A Comprehensive Survey for AI. IEEE Trans. Knowl. Data Eng. 35, 7 (2022), 6866-6886.","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"e_1_3_2_1_48_1","volume-title":"SkyEyeGPT: Unifying Remote Sensing Vision-Language Tasks via Instruction Tuning with Large Language Model. arXiv preprint arXiv:2401.09712","author":"Zhan Yang","year":"2024","unstructured":"Yang Zhan, Zhitong Xiong, and Yuan Yuan. 2024. SkyEyeGPT: Unifying Remote Sensing Vision-Language Tasks via Instruction Tuning with Large Language Model. arXiv preprint arXiv:2401.09712 (2024)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3510781"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3685510"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3336471"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs15041156"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680604"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755299","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:10:16Z","timestamp":1765339816000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755299"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":54,"alternative-id":["10.1145\/3746027.3755299","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755299","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}