{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T13:04:36Z","timestamp":1780664676331,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,26]],"date-time":"2026-04-26T00:00:00Z","timestamp":1777161600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62132014"],"award-info":[{"award-number":["62132014"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fundamental and Interdisciplinary Disciplines Breakthrough Plan of the Ministry of Education of China","award":["JYB2025XDXM113"],"award-info":[{"award-number":["JYB2025XDXM113"]}]},{"DOI":"10.13039\/501100003816","name":"Huawei Technologies","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003816","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3767295.3803572","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T20:20:04Z","timestamp":1777062004000},"page":"126-143","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On-device Semantic Selection Made Low Latency and Memory Efficient with Monolithic Forwarding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2023-8723","authenticated-orcid":false,"given":"Jiahao","family":"Zhou","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7315-1883","authenticated-orcid":false,"given":"Chengliang","family":"Lin","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3559-5467","authenticated-orcid":false,"given":"Dingji","family":"Li","sequence":"additional","affiliation":[{"name":"Huawei, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4839-055X","authenticated-orcid":false,"given":"Mingkai","family":"Dong","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9720-0361","authenticated-orcid":false,"given":"Haibo","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. ellamind\/wikipedia-2023-11-retrieval-multilingual-corpus \u2022 Datasets at HuggingFace. https:\/\/huggingface.co\/datasets\/ellamind\/wikipedia-2023-11-retrieval-multilingual-corpus"},{"key":"e_1_3_2_1_2_1","unstructured":"2024. BAAI\/bge-reranker-v2-gemma \u2022 HuggingFace. https:\/\/huggingface.co\/BAAI\/bge-reranker-v2-gemma."},{"key":"e_1_3_2_1_3_1","unstructured":"2024. BAAI\/bge-reranker-v2-m3 \u2022 HuggingFace. https:\/\/huggingface.co\/BAAI\/bge-reranker-v2-m3."},{"key":"e_1_3_2_1_4_1","unstructured":"2024. BAAI\/bge-reranker-v2-minicpm-layerwise \u2022 HuggingFace. https:\/\/huggingface.co\/BAAI\/bge-reranker-v2-minicpm-layerwise."},{"key":"e_1_3_2_1_5_1","unstructured":"2025. Global interpreter Lock - Python Wiki. https:\/\/wiki.python.org\/moin\/GlobalInterpreterLock"},{"key":"e_1_3_2_1_6_1","unstructured":"2025. IPADS-SAI\/MobiAgent: The Intelligent GUI agent for mobile Phones. https:\/\/github.com\/IPADS-SAI\/MobiAgent"},{"key":"e_1_3_2_1_7_1","unstructured":"2025. IPADS-SAI\/MobiMind-Decider-7B \u2022 HuggingFace. https:\/\/huggingface.co\/IPADS-SAI\/MobiMind-Decider-7B"},{"key":"e_1_3_2_1_8_1","unstructured":"2025. libuv\/libuv: Cross-platform asynchronous I\/O. https:\/\/github.com\/libuv\/libuv"},{"key":"e_1_3_2_1_9_1","unstructured":"2025. Magic Cue on Pixel 10 Series Phones: smart contextual assistance across apps emails & more. https:\/\/store.google.com\/intl\/en\/ideas\/articles\/magic-cue\/."},{"key":"e_1_3_2_1_10_1","volume-title":"Huawei HarmonyOS Developer. https:\/\/developer.huawei.com\/consumer\/cn\/doc\/harmonyos-guides\/performance-memory-usage [Online","author":"Usage Memory","year":"2025","unstructured":"2025. Memory Usage, Huawei HarmonyOS Developer. https:\/\/developer.huawei.com\/consumer\/cn\/doc\/harmonyos-guides\/performance-memory-usage [Online; accessed 2025-09-18]."},{"key":"e_1_3_2_1_11_1","unstructured":"2025. Qwen\/Qwen3-Reranker-8B \u2022 HuggingFace. https:\/\/huggingface.co\/Qwen\/Qwen3-Reranker-8B"},{"key":"e_1_3_2_1_12_1","unstructured":"2025. Rerank | Boost Enterprise Search and Retrieval | Cohere. https:\/\/cohere.com\/rerank."},{"key":"e_1_3_2_1_13_1","volume-title":"Reranking for Vertex AI RAG Engine | Generative AI on Vertex AI | Google Cloud. https:\/\/cloud.google.com\/vertex-ai\/generative-ai\/docs\/rag-engine\/retrieval-and-ranking [Online","year":"2025","unstructured":"2025. Reranking for Vertex AI RAG Engine | Generative AI on Vertex AI | Google Cloud. https:\/\/cloud.google.com\/vertex-ai\/generative-ai\/docs\/rag-engine\/retrieval-and-ranking [Online; accessed 2025-09-12]."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 18th USENIX Conference on Operating Systems Design and Implementation","author":"Agrawal Amey","year":"2025","unstructured":"Amey Agrawal, Nitin Kedia, Ashish Panwar, Jayashree Mohan, Nipun Kwatra, Bhargav S. Gulavani, Alexey Tumanov, and Ramachandran Ramjee. 2025. Taming throughput-latency tradeoff in LLM inference with sarathi-serve. In Proceedings of the 18th USENIX Conference on Operating Systems Design and Implementation (Santa Clara, CA, USA) (OSDI'24). USENIX Association, USA, Article 7, 18 pages."},{"key":"e_1_3_2_1_15_1","unstructured":"Avinashsingh. 2025. [Feature]: Add support for Apple MPS(Metal Performance Shaders). https:\/\/github.com\/vllm-project\/vllm\/issues\/22629"},{"key":"e_1_3_2_1_16_1","volume-title":"LongBench v2: Towards Deeper Understanding and Reasoning on Realistic Long-context Multitasks. arXiv preprint arXiv:2412.15204","author":"Bai Yushi","year":"2024","unstructured":"Yushi Bai, Shangqing Tu, Jiajie Zhang, Hao Peng, Xiaozhi Wang, Xin Lv, Shulin Cao, Jiazheng Xu, Lei Hou, Yuxiao Dong, Jie Tang, and Juanzi Li. 2024. LongBench v2: Towards Deeper Understanding and Reasoning on Realistic Long-context Multitasks. arXiv preprint arXiv:2412.15204 (2024)."},{"key":"e_1_3_2_1_17_1","unstructured":"Beir-Cellar. 2025. beir-cellar\/beir: A Heterogeneous Benchmark for Information Retrieval. Easy to use evaluate your models across 15+ diverse IR datasets. https:\/\/github.com\/beir-cellar\/beir"},{"key":"e_1_3_2_1_18_1","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. https:\/\/arxiv.org\/abs\/2005.14165. arXiv:2005.14165 [cs.CL]"},{"key":"e_1_3_2_1_19_1","volume-title":"EfficientQAT: Efficient Quantization-Aware Training for Large Language Models. arXiv preprint arXiv:2407.11062","author":"Chen Mengzhao","year":"2024","unstructured":"Mengzhao Chen, Wenqi Shao, Peng Xu, Jiahao Wang, Peng Gao, Kaipeng Zhang, Yu Qiao, and Ping Luo. 2024. EfficientQAT: Efficient Quantization-Aware Training for Large Language Models. arXiv preprint arXiv:2407.11062 (2024)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Prateek Chhikara Dev Khant Saket Aryan Taranjeet Singh and Deshraj Yadav. 2025. Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory. arXiv:2504.19413 [cs.CL] https:\/\/arxiv.org\/abs\/2504.19413","DOI":"10.3233\/FAIA251160"},{"key":"e_1_3_2_1_21_1","unstructured":"Cohere. 2024. Introducing Rerank 3 on Microsoft Azure AI | Cohere blog. https:\/\/cohere.com\/blog\/introducing-rerank-3-on-microsoft-azure-ai."},{"key":"e_1_3_2_1_22_1","unstructured":"Wikipedia contributors. 2022. Goodman and Kruskal's gamma. https:\/\/en.wikipedia.org\/wiki\/Goodman_and_Kruskal%27s_gamma"},{"key":"e_1_3_2_1_23_1","unstructured":"Wikipedia contributors. 2025. Coefficient of variation. https:\/\/en.wikipedia.org\/wiki\/Coefficient_of_variation"},{"key":"e_1_3_2_1_24_1","unstructured":"Wikipedia contributors. 2025. Global interpreter lock. https:\/\/en.wikipedia.org\/wiki\/Global_interpreter_lock"},{"key":"e_1_3_2_1_25_1","unstructured":"Mike Darling. 2025. 4 ways Pixel's Magic Cue can help you save time. https:\/\/blog.google\/products\/pixel\/google-pixel-magic-cue-ai-feature\/."},{"key":"e_1_3_2_1_26_1","unstructured":"Gabriel de Souza P. Moreira Ronay Ak Benedikt Schifferer Mengyao Xu Radek Osmulski and Even Oldridge. 2024. Enhancing Q&A Text Retrieval with Ranking Models: Benchmarking fine-tuning and deploying Rerankers for RAG. arXiv:2409.07691 [cs.IR] https:\/\/arxiv.org\/abs\/2409.07691"},{"key":"e_1_3_2_1_27_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. https:\/\/arxiv.org\/abs\/1810.04805. arXiv:1810.04805 [cs.CL]","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. https:\/\/arxiv.org\/abs\/1810.04805. arXiv:1810.04805 [cs.CL]"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3731569.3764834"},{"key":"e_1_3_2_1_29_1","volume-title":"GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers. https:\/\/arxiv.org\/abs\/2210.17323. arXiv:2210.17323 [cs.LG]","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2023. GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers. https:\/\/arxiv.org\/abs\/2210.17323. arXiv:2210.17323 [cs.LG]"},{"key":"e_1_3_2_1_30_1","unstructured":"Qichen Fu Minsik Cho Thomas Merth Sachin Mehta Mohammad Rastegari and Mahyar Najibi.2024. LazyLLM: Dynamic Token Pruning for Efficient Long Context LLM Inference. arXiv:2407.14057 [cs.CL] https:\/\/arxiv.org\/abs\/2407.14057"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.194"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.2307\/2346830"},{"key":"e_1_3_2_1_33_1","unstructured":"HuggingFace. 2025. Accelerate: A simple way to launch train and use PyTorch models on almost any device and distributed configuration automatic mixed precision (including fp8) and easy-to-configure FSDP and DeepSpeed support. https:\/\/github.com\/huggingface\/accelerate"},{"key":"e_1_3_2_1_34_1","unstructured":"HuggingFace. 2025. Transformers: the model-definition framework for state-of-the-art machine learning models in text vision audio and multimodal models for both inference and training. https:\/\/github.com\/huggingface\/transformers"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1356"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.91"},{"key":"e_1_3_2_1_37_1","unstructured":"Jiazheng Kang Mingming Ji Zhe Zhao and Ting Bai. 2025. Memory OS of AI Agent. arXiv:2506.06326 [cs.AI] https:\/\/arxiv.org\/abs\/2506.06326"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_39_1","unstructured":"Khan-Yin. 2025. [Feature] Support Apple Silicon (M2\/M3...). https:\/\/github.com\/sgl-project\/sglang\/issues\/5767"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_42_1","unstructured":"Xunhao Lai Jianqiao Lu Yao Luo Yiyuan Ma and Xun Zhou. 2025. FlexPrefill: A Context-Aware Sparse Attention Mechanism for Efficient Long-Sequence Inference. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=OfjMllbelrT"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3496517"},{"key":"e_1_3_2_1_44_1","unstructured":"Jiaxing Li Chi Xu Lianchen Jia Feng Wang Cong Zhang and Jiangchuan Liu. 2025. EACO-RAG: Towards Distributed Tiered LLM Deployment using Edge-Assisted and Collaborative RAG with Adaptive Knowledge Update. arXiv:2410.20299 [cs.DC] https:\/\/arxiv.org\/abs\/2410.20299"},{"key":"e_1_3_2_1_45_1","volume-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. https:\/\/arxiv.org\/abs\/2306.00978. arXiv:2306.00978 [cs.CL]","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. https:\/\/arxiv.org\/abs\/2306.00978. arXiv:2306.00978 [cs.CL]"},{"key":"e_1_3_2_1_46_1","unstructured":"Lingkun Long Rubing Yang Yushi Huang Desheng Hui Ao Zhou and Jianlei Yang. 2025. SlimInfer: Accelerating Long-Context LLM Inference via Dynamic Token Pruning. arXiv:2508.06447 [cs.CL] https:\/\/arxiv.org\/abs\/2508.06447"},{"key":"e_1_3_2_1_47_1","volume-title":"LLM-Pruner: On the Structural Pruning of Large Language Models. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=J8Ajf9WfXP","author":"Ma Xinyin","year":"2023","unstructured":"Xinyin Ma, Gongfan Fang, and Xinchao Wang. 2023. LLM-Pruner: On the Structural Pruning of Large Language Models. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=J8Ajf9WfXP"},{"key":"e_1_3_2_1_48_1","unstructured":"Milvus. 2024. Milvus | The High-Performance Vector Database built for Scale. https:\/\/milvus.io\/."},{"key":"e_1_3_2_1_49_1","unstructured":"NVIDIA. 2025. NVIDIA Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/mps\/index.html"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Authors of BitNet b1.58. 2025. BitNet b1.58: 1.58-bit Large Language Models. arXiv:2504.12285 [cs.CL] https:\/\/arxiv.org\/abs\/2504.12285","DOI":"10.1017\/S0261444825000229"},{"key":"e_1_3_2_1_51_1","unstructured":"PyTorch. 2025. Multiprocessing package - torch.multiprocessing. https:\/\/docs.pytorch.org\/docs\/2.8\/multiprocessing.html"},{"key":"e_1_3_2_1_52_1","unstructured":"Yuwei Ren Yuhui Ding Lijun Wu Shujian Huang Lei Li and Qun Liu. 2024. BitNet a4.8: 1-bit Weight 4-bit Activation LLMs. arXiv:2411.04965 [cs.CL] https:\/\/arxiv.org\/abs\/2411.04965"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.225"},{"key":"e_1_3_2_1_54_1","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2019. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. arXiv:1910.01108 [cs.CL] https:\/\/arxiv.org\/abs\/1910.01108"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.272"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Yixin Song Zeyu Mi Haotong Xie and Haibo Chen. 2024. PowerInfer: Fast Large Language Model Serving with a Consumer-grade GPU. arXiv:2312.12456 [cs.LG] https:\/\/arxiv.org\/abs\/2312.12456","DOI":"10.1145\/3694715.3695964"},{"key":"e_1_3_2_1_57_1","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=wCu6T5xFjeJ"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457550"},{"key":"e_1_3_2_1_60_1","volume-title":"Gonzalez","author":"Wang Yichuan","year":"2025","unstructured":"Yichuan Wang, Shu Liu, Zhifei Li, Yongji Wu, Ziming Mao, Yilong Zhao, Xiao Yan, Zhiying Xu, Yang Zhou, Ion Stoica, Sewon Min, Matei Zaharia, and Joseph E. Gonzalez. 2025. LEANN: A Low-Storage Vector Index. arXiv:2506.08276 [cs.DB] https:\/\/arxiv.org\/abs\/2506.08276"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657662"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-naacl.176"},{"key":"e_1_3_2_1_63_1","volume-title":"Agent Workflow Memory. In Forty-second International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=NTAhi2JEEE","author":"Wang Zora Zhiruo","year":"2025","unstructured":"Zora Zhiruo Wang, Jiayuan Mao, Daniel Fried, and Graham Neubig. 2025. Agent Workflow Memory. In Forty-second International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=NTAhi2JEEE"},{"key":"e_1_3_2_1_64_1","unstructured":"Orion Weller Michael Boratko Iftekhar Naim and Jinhyuk Lee. 2025. On the Theoretical Limitations of Embedding-Based Retrieval. arXiv:2508.21038 [cs.IR] https:\/\/arxiv.org\/abs\/2508.21038"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-industry.11"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"crossref","unstructured":"Ji Xin Raphael Tang Jaejun Lee Yaoliang Yu and Jimmy Lin. 2020. DeeBERT: Dynamic Early Exiting for Accelerating BERT Inference. arXiv:2004.12993 [cs.CL] https:\/\/arxiv.org\/abs\/2004.12993","DOI":"10.18653\/v1\/2020.acl-main.204"},{"key":"e_1_3_2_1_67_1","unstructured":"Zhenliang Xue Yixin Song Zeyu Mi Xinrui Zheng Yubin Xia and Haibo Chen. 2024. PowerInfer-2: Fast Large Language Model Inference on a Smartphone. arXiv:2406.06282 [cs.LG] https:\/\/arxiv.org\/abs\/2406.06282"},{"key":"e_1_3_2_1_68_1","unstructured":"Cheng Zhang Erhu Feng Xi Zhao Yisheng Zhao Wangbo Gong Jiahui Sun Dong Du Zhichao Hua Yubin Xia and Haibo Chen. 2025. MobiAgent: A Systematic Framework for Customizable Mobile Agents. arXiv:2509.00531 [cs.MA] https:\/\/arxiv.org\/abs\/2509.00531"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2412.03131"},{"key":"e_1_3_2_1_70_1","unstructured":"Yanzhao Zhang Mingxin Li Dingkun Long Xin Zhang Huan Lin Baosong Yang Pengjun Xie An Yang Dayiheng Liu Junyang Lin Fei Huang and Jingren Zhou. 2025. Qwen3 Embedding: Advancing Text Embedding and Reranking Through Foundation Models. https:\/\/arxiv.org\/abs\/2506.05176. arXiv:2506.05176 [cs.CL]"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637870"},{"key":"e_1_3_2_1_72_1","unstructured":"George Kingsley Zipf. 1949. Human behavior and the principle of least effort. (1949)."}],"event":{"name":"EUROSYS '26: 21st European Conference on Computer Systems","location":"McEwan Hall\/The University of Edinburgh Edinburgh Scotland UK","acronym":"EUROSYS '26","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 21st European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3767295.3803572","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T12:13:39Z","timestamp":1780661619000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3767295.3803572"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":72,"alternative-id":["10.1145\/3767295.3803572","10.1145\/3767295"],"URL":"https:\/\/doi.org\/10.1145\/3767295.3803572","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}