{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T07:12:09Z","timestamp":1779174729709,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":138,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2022A1515010120"],"award-info":[{"award-number":["2022A1515010120"]}]},{"name":"China NSFC","award":["62202313,62225202"],"award-info":[{"award-number":["62202313,62225202"]}]},{"name":"Longhua Science and Technology Innovation Bureau","award":["10162A20220720B12AB12"],"award-info":[{"award-number":["10162A20220720B12AB12"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671873","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:55:12Z","timestamp":1724561712000},"page":"3690-3701","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Efficient Mixture of Experts based on Large Language Models for Low-Resource Data Preprocessing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8249-9695","authenticated-orcid":false,"given":"Mengyi","family":"Yan","sequence":"first","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5760-5145","authenticated-orcid":false,"given":"Yaoshu","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Computing Sciences, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4086-1421","authenticated-orcid":false,"given":"Kehan","family":"Pang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2356-782X","authenticated-orcid":false,"given":"Min","family":"Xie","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Computing Sciences, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5152-0055","authenticated-orcid":false,"given":"Jianxin","family":"Li","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"4102","article-title":"Review of data preprocessing techniques in data mining","volume":"12","author":"Alasadi Suad A","year":"2017","unstructured":"Suad A Alasadi and Wesam S Bhaya. 2017. Review of data preprocessing techniques in data mining. Journal of Engineering and Applied Sciences, Vol. 12, 16 (2017), 4102--4107.","journal-title":"Journal of Engineering and Applied Sciences"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476300"},{"key":"e_1_3_2_2_3_1","unstructured":"Xiao Bi Deli Chen Guanting Chen Shanhuang Chen Damai Dai Chengqi Deng Honghui Ding Kai Dong Qiushi Du Zhe Fu et al. 2024. DeepSeek LLM: Scaling Open-Source Language Models with Longtermism. arXiv preprint arXiv:2401.02954 (2024)."},{"key":"e_1_3_2_2_4_1","volume-title":"Adaptive Blocking: Learning to Scale Up Record Linkage. In ICDM. 87--96.","author":"Bilenko Mikhail","year":"2006","unstructured":"Mikhail Bilenko, Beena Kamath, and Raymond J Mooney. 2006. Adaptive Blocking: Learning to Scale Up Record Linkage. In ICDM. 87--96."},{"key":"e_1_3_2_2_5_1","volume-title":"Product Attribute Value Extraction using Large Language Models. arXiv preprint arXiv:2310.12537","author":"Brinkmann Alexander","year":"2023","unstructured":"Alexander Brinkmann, Roee Shraga, and Christian Bizer. 2023. Product Attribute Value Extraction using Large Language Models. arXiv preprint arXiv:2310.12537 (2023)."},{"key":"e_1_3_2_2_6_1","volume-title":"Multitask learning. Machine learning","author":"Caruana Rich","year":"1997","unstructured":"Rich Caruana. 1997. Multitask learning. Machine learning, Vol. 28 (1997), 41--75."},{"key":"e_1_3_2_2_7_1","volume-title":"Punica: Multi-tenant lora serving. arXiv preprint arXiv:2310.18547","author":"Chen Lequn","year":"2023","unstructured":"Lequn Chen, Zihao Ye, Yongji Wu, Danyang Zhuo, Luis Ceze, and Arvind Krishnamurthy. 2023. Punica: Multi-tenant lora serving. arXiv preprint arXiv:2310.18547 (2023)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455961"},{"key":"e_1_3_2_2_9_1","volume-title":"SEED: Domain-Specific Data Curation With Large Language Models. arXiv e-prints","author":"Chen Zui","year":"2023","unstructured":"Zui Chen, Lei Cao, Sam Madden, Tim Kraska, Zeyuan Shang, Ju Fan, Nan Tang, Zihui Gu, Chunwei Liu, and Michael Cafarella. 2023. SEED: Domain-Specific Data Curation With Large Language Models. arXiv e-prints (2023), arXiv--2310."},{"key":"e_1_3_2_2_10_1","volume-title":"Towards understanding mixture of experts in deep learning. arXiv preprint arXiv:2208.02813","author":"Chen Zixiang","year":"2022","unstructured":"Zixiang Chen, Yihe Deng, Yue Wu, Quanquan Gu, and Yuanzhi Li. 2022. Towards understanding mixture of experts in deep learning. arXiv preprint arXiv:2208.02813 (2022)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544847"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2749431"},{"key":"e_1_3_2_2_13_1","volume-title":"International Conference on Machine Learning. PMLR, 4057--4086","author":"Clark Aidan","year":"2022","unstructured":"Aidan Clark, Diego De Las Casas, Aurelia Guy, Arthur Mensch, Michela Paganini, Jordan Hoffmann, Bogdan Damoc, Blake Hechtman, Trevor Cai, Sebastian Borgeaud, et al. 2022. Unified scaling laws for routed language models. In International Conference on Machine Learning. PMLR, 4057--4086."},{"key":"e_1_3_2_2_14_1","unstructured":"Gao Cong Wenfei Fan Floris Geerts Xibei Jia and Shuai Ma. 2007. Improving Data Quality: Consistency and Accuracy. In VLDB. 315--326."},{"key":"e_1_3_2_2_15_1","volume-title":"Yi Chen, and Subbarao Kambhampati.","author":"De Sushovan","year":"2015","unstructured":"Sushovan De, Yuheng Hu, Venkata Vamsikrishna Meduri, Yi Chen, and Subbarao Kambhampati. 2015. BayesWipe: A Scalable Probabilistic Framework for Cleaning BigData. CoRR, Vol. abs\/1506.08908 (2015)."},{"key":"e_1_3_2_2_16_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. 4171--4186.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. 4171--4186."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.583"},{"key":"e_1_3_2_2_18_1","volume-title":"Leveraging currency for repairing inconsistent and incomplete data. TKDE","author":"Ding Xiaoou","year":"2020","unstructured":"Xiaoou Ding, Hongzhi Wang, Jiaxuan Su, Muxian Wang, Jianzhong Li, and Hong Gao. 2020. Leveraging currency for repairing inconsistent and incomplete data. TKDE (2020)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2003.809034"},{"key":"e_1_3_2_2_20_1","volume-title":"LORAMOE: REVOLUTIONIZING MIXTURE OF EX-PERTS FOR MAINTAINING WORLD KNOWLEDGE IN LANGUAGE MODEL ALIGNMENT. arXiv preprint arXiv:2312.09979","author":"Dou Shihan","year":"2023","unstructured":"Shihan Dou, Enyu Zhou, Yan Liu, Songyang Gao, Jun Zhao, Wei Shen, Yuhao Zhou, Zhiheng Xi, Xiao Wang, Xiaoran Fan, et al. 2023. LORAMOE: REVOLUTIONIZING MIXTURE OF EX-PERTS FOR MAINTAINING WORLD KNOWLEDGE IN LANGUAGE MODEL ALIGNMENT. arXiv preprint arXiv:2312.09979 (2023)."},{"key":"e_1_3_2_2_21_1","volume-title":"International Conference on Machine Learning. PMLR, 5547--5569","author":"Du Nan","year":"2022","unstructured":"Nan Du, Yanping Huang, Andrew M Dai, Simon Tong, Dmitry Lepikhin, Yuanzhong Xu, Maxim Krikun, Yanqi Zhou, Adams Wei Yu, Orhan Firat, et al. 2022. Glam: Efficient scaling of language models with mixture-of-experts. In International Conference on Machine Learning. PMLR, 5547--5569."},{"key":"e_1_3_2_2_22_1","volume-title":"Fixing moe over-fitting on low-resource languages in multilingual machine translation. arXiv preprint arXiv:2212.07571","author":"Elbayad Maha","year":"2022","unstructured":"Maha Elbayad, Anna Sun, and Shruti Bhosale. 2022. Fixing moe over-fitting on low-resource languages in multilingual machine translation. arXiv preprint arXiv:2212.07571 (2022)."},{"key":"e_1_3_2_2_23_1","volume-title":"Tabel: Entity linking in web tables. In ISWC.","author":"Bhagavatula","year":"2015","unstructured":"Bhagavatula et al. 2015. Tabel: Entity linking in web tables. In ISWC."},{"key":"e_1_3_2_2_24_1","unstructured":"Brown et al. 2020. Language models are few-shot learners. NIPS (2020)."},{"key":"e_1_3_2_2_25_1","unstructured":"Dong et al. 2022. A survey for in-context learning. arXiv preprint (2022)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542700.3542709"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"crossref","unstructured":"Karpukhin et al. 2020. Dense passage retrieval for open-domain question answering. arXiv preprint (2020).","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_2_28_1","unstructured":"Lewis et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. NIPS (2020)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457258"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Suhara et al. 2022. Annotating columns with pre-trained language models. In SIGMOD.","DOI":"10.1145\/3514221.3517906"},{"key":"e_1_3_2_2_31_1","volume-title":"RECA: Related Tables Enhanced Column Semantic Type Annotation Framework. VLDB","author":"Sun","year":"2023","unstructured":"Sun et al. 2023. RECA: Related Tables Enhanced Column Semantic Type Annotation Framework. VLDB (2023)."},{"key":"e_1_3_2_2_32_1","volume-title":"2022 d. Emergent abilities of large language models. arXiv preprint","author":"Wei","year":"2022","unstructured":"Wei et al. 2022 d. Emergent abilities of large language models. arXiv preprint (2022)."},{"key":"e_1_3_2_2_33_1","unstructured":"Xiao et al. 2023. C-Pack: Packaged Resources To Advance General Chinese Embedding."},{"key":"e_1_3_2_2_34_1","unstructured":"Zhao et al. 2023. A survey of large language models. arXiv preprint (2023)."},{"key":"e_1_3_2_2_35_1","first-page":"1","article-title":"Making It Tractable to Catch Duplicates and Conflicts in Graphs","volume":"1","author":"Fan Wenfei","year":"2023","unstructured":"Wenfei Fan, Wenzhi Fu, Ruochun Jin, Muyang Liu, Ping Lu, and Chao Tian. 2023. Making It Tractable to Catch Duplicates and Conflicts in Graphs. Proceedings of the ACM on Management of Data, Vol. 1, 1 (2023), 1--28.","journal-title":"Proceedings of the ACM on Management of Data"},{"key":"e_1_3_2_2_36_1","first-page":"1","article-title":"Splitting Tuples of Mismatched Entities","volume":"1","author":"Fan Wenfei","year":"2023","unstructured":"Wenfei Fan, Ziyan Han, Weilong Ren, Ding Wang, Yaoshu Wang, Min Xie, and Mengyi Yan. 2023. Splitting Tuples of Mismatched Entities. Proceedings of the ACM on Management of Data, Vol. 1, 4 (2023), 1--29.","journal-title":"Proceedings of the ACM on Management of Data"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.14778\/1687627.1687674"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-011-0253-7"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-020-2917-1"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/3586589.3586709"},{"key":"e_1_3_2_2_41_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_2_42_1","unstructured":"Leo Gao Stella Biderman Sid Black Laurence Golding Travis Hoppe Charles Foster Jason Phang Horace He Anish Thite Noa Nabeshima et al. 2020. The pile: An 800gb dataset of diverse text for language modeling. arXiv preprint arXiv:2101.00027 (2020)."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1186\/s41044-016-0014-0"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536360.2536363"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"crossref","unstructured":"Amir Gilad Daniel Deutch and Sudeepa Roy. 2020. On multiple semantics for declarative database repairs. In SIGMOD. 817--831.","DOI":"10.1145\/3318464.3389721"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2588576"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920897"},{"key":"e_1_3_2_2_48_1","unstructured":"Jiawei Han Jian Pei and Hanghang Tong. 2022. Data mining: concepts and techniques. Morgan kaufmann."},{"key":"e_1_3_2_2_49_1","volume-title":"Sai Qian Zhang, et al","author":"Han Zeyu","year":"2024","unstructured":"Zeyu Han, Chao Gao, Jinyang Liu, Sai Qian Zhang, et al. 2024. Parameter-efficient fine-tuning for large models: A comprehensive survey. arXiv preprint arXiv:2403.14608 (2024)."},{"key":"e_1_3_2_2_50_1","first-page":"29335","article-title":"Dselect-k: Differentiable selection in the mixture of experts with applications to multi-task learning","volume":"34","author":"Hazimeh Hussein","year":"2021","unstructured":"Hussein Hazimeh, Zhe Zhao, Aakanksha Chowdhery, Maheswaran Sathiamoorthy, Yihua Chen, Rahul Mazumder, Lichan Hong, and Ed Chi. 2021. Dselect-k: Differentiable selection in the mixture of experts with applications to multi-task learning. Advances in Neural Information Processing Systems, Vol. 34 (2021), 29335--29347.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_51_1","volume-title":"Mera: Merging pretrained adapters for few-shot learning. arXiv preprint arXiv:2308.15982","author":"He Shwai","year":"2023","unstructured":"Shwai He, Run-Ze Fan, Liang Ding, Li Shen, Tianyi Zhou, and Dacheng Tao. 2023. Mera: Merging pretrained adapters for few-shot learning. arXiv preprint arXiv:2308.15982 (2023)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"crossref","unstructured":"Alireza Heidari Joshua McGrath Ihab F. Ilyas and Theodoros Rekatsinas. 2019. HoloDetect: Few-Shot Learning for Error Detection. In SIGMOD. 829--846.","DOI":"10.1145\/3299869.3319888"},{"key":"e_1_3_2_2_53_1","volume-title":"In-context learning creates task vectors. arXiv preprint arXiv:2310.15916","author":"Hendel Roee","year":"2023","unstructured":"Roee Hendel, Mor Geva, and Amir Globerson. 2023. In-context learning creates task vectors. arXiv preprint arXiv:2310.15916 (2023)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Benjamin Hilprecht and Carsten Binnig. 2021. ReStore - Neural Data Completion for Relational Databases. In SIGMOD. 710--722.","DOI":"10.1145\/3448016.3457264"},{"key":"e_1_3_2_2_55_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_2_56_1","volume-title":"Tianyu Pang, Chao Du, and Min Lin.","author":"Huang Chengsong","year":"2023","unstructured":"Chengsong Huang, Qian Liu, Bill Yuchen Lin, Tianyu Pang, Chao Du, and Min Lin. 2023. Lorahub: Efficient cross-task generalization via dynamic lora composition. arXiv preprint arXiv:2307.13269 (2023)."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196889"},{"key":"e_1_3_2_2_58_1","volume-title":"Tabbie: Pretrained representations of tabular data. arXiv preprint arXiv:2105.02584","author":"Iida Hiroshi","year":"2021","unstructured":"Hiroshi Iida, Dung Thai, Varun Manjunatha, and Mohit Iyyer. 2021. Tabbie: Pretrained representations of tabular data. arXiv preprint arXiv:2105.02584 (2021)."},{"key":"e_1_3_2_2_59_1","volume-title":"Adaptive mixtures of local experts. Neural computation","author":"Jacobs Robert A","year":"1991","unstructured":"Robert A Jacobs, Michael I Jordan, Steven J Nowlan, and Geoffrey E Hinton. 1991. Adaptive mixtures of local experts. Neural computation, Vol. 3, 1 (1991), 79--87."},{"key":"e_1_3_2_2_60_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_2_61_1","volume-title":"Diego de las Casas, Emma Bou Hanna, Florian Bressand, et al.","author":"Jiang Albert Q","year":"2024","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Antoine Roux, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Emma Bou Hanna, Florian Bressand, et al. 2024. Mixtral of Experts. arXiv preprint arXiv:2401.04088 (2024)."},{"key":"e_1_3_2_2_62_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_2_63_1","volume-title":"How Does Information Bottleneck Help Deep Learning? arXiv preprint arXiv:2305.18887","author":"Kawaguchi Kenji","year":"2023","unstructured":"Kenji Kawaguchi, Zhun Deng, Xu Ji, and Jiaoyang Huang. 2023. How Does Information Bottleneck Help Deep Learning? arXiv preprint arXiv:2305.18887 (2023)."},{"key":"e_1_3_2_2_64_1","volume-title":"Basil Mustafa, Joshua Ainslie, Yi Tay, Mostafa Dehghani, and Neil Houlsby.","author":"Komatsuzaki Aran","year":"2022","unstructured":"Aran Komatsuzaki, Joan Puigcerver, James Lee-Thorp, Carlos Riquelme Ruiz, Basil Mustafa, Joshua Ainslie, Yi Tay, Mostafa Dehghani, and Neil Houlsby. 2022. Sparse upcycling: Training mixture-of-experts from dense checkpoints. arXiv preprint arXiv:2212.05055 (2022)."},{"key":"e_1_3_2_2_65_1","volume-title":"Column Type Annotation using ChatGPT. arXiv preprint arXiv:2306.00745","author":"Korini Keti","year":"2023","unstructured":"Keti Korini and Christian Bizer. 2023. Column Type Annotation using ChatGPT. arXiv preprint arXiv:2306.00745 (2023)."},{"key":"e_1_3_2_2_66_1","volume-title":"Beyond distillation: Task-level mixture-of-experts for efficient inference. arXiv preprint arXiv:2110.03742","author":"Kudugunta Sneha","year":"2021","unstructured":"Sneha Kudugunta, Yanping Huang, Ankur Bapna, Maxim Krikun, Dmitry Lepikhin, Minh-Thang Luong, and Orhan Firat. 2021. Beyond distillation: Task-level mixture-of-experts for efficient inference. arXiv preprint arXiv:2110.03742 (2021)."},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_2_68_1","volume-title":"Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:2006.16668","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:2006.16668 (2020)."},{"key":"e_1_3_2_2_69_1","volume-title":"Mansinghka","author":"Lew Alexander K.","year":"2020","unstructured":"Alexander K. Lew, Monica Agrawal, David A. Sontag, and Vikash K. Mansinghka. 2020. PClean: Bayesian Data Cleaning at Scale with Domain-Specific Probabilistic Programming. CoRR, Vol. abs\/2007.11838 (2020)."},{"key":"e_1_3_2_2_70_1","volume-title":"International Conference on Machine Learning. PMLR, 6265--6274","author":"Lewis Mike","year":"2021","unstructured":"Mike Lewis, Shruti Bhosale, Tim Dettmers, Naman Goyal, and Luke Zettlemoyer. 2021. Base layers: Simplifying training of large, sparse models. In International Conference on Machine Learning. PMLR, 6265--6274."},{"key":"e_1_3_2_2_71_1","volume-title":"Muhammad Asif Ali, and Yi Wang","author":"Li Bing","year":"2020","unstructured":"Bing Li, Wei Wang, Yifang Sun, Linhan Zhang, Muhammad Asif Ali, and Yi Wang. 2020. GraphER: Token-Centric Entity Resolution with Graph Convolutional Neural Networks.. In AAAI. 8172--8179."},{"key":"e_1_3_2_2_72_1","volume-title":"Dongmei Zhang, and Surajit Chaudhuri.","author":"Li Peng","year":"2023","unstructured":"Peng Li, Yeye He, Dror Yashar, Weiwei Cui, Song Ge, Haidong Zhang, Danielle Rifinski Fainman, Dongmei Zhang, and Surajit Chaudhuri. 2023. Table-gpt: Table-tuned gpt for diverse table tasks. arXiv preprint arXiv:2310.09263 (2023)."},{"key":"e_1_3_2_2_73_1","volume-title":"Deep model fusion: A survey. arXiv preprint arXiv:2309.15698","author":"Li Weishi","year":"2023","unstructured":"Weishi Li, Yong Peng, Miao Zhang, Liang Ding, Han Hu, and Li Shen. 2023. Deep model fusion: A survey. arXiv preprint arXiv:2309.15698 (2023)."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.14778\/3421424.3421431"},{"key":"e_1_3_2_2_75_1","volume-title":"Deep entity matching with pre-trained language models. arXiv preprint arXiv:2004.00584","author":"Li Yuliang","year":"2020","unstructured":"Yuliang Li, Jinfeng Li, Yoshihiko Suhara, AnHai Doan, and Wang-Chiew Tan. 2020. Deep entity matching with pre-trained language models. arXiv preprint arXiv:2004.00584 (2020)."},{"key":"e_1_3_2_2_76_1","volume-title":"Few-shot Adaptation of Multi-modal Foundation Models: A Survey. arXiv preprint arXiv:2401.01736","author":"Liu Fan","year":"2024","unstructured":"Fan Liu, Tianshu Zhang, Wenwen Dai, Wenwen Cai, Xiaocong Zhou, and Delong Chen. 2024. Few-shot Adaptation of Multi-modal Foundation Models: A Survey. arXiv preprint arXiv:2401.01736 (2024)."},{"key":"e_1_3_2_2_77_1","volume-title":"Moelora: An moe-based parameter efficient fine-tuning method for multi-task medical applications. arXiv preprint arXiv:2310.18339","author":"Liu Qidong","year":"2023","unstructured":"Qidong Liu, Xian Wu, Xiangyu Zhao, Yuanshao Zhu, Derong Xu, Feng Tian, and Yefeng Zheng. 2023. Moelora: An moe-based parameter efficient fine-tuning method for multi-task medical applications. arXiv preprint arXiv:2310.18339 (2023)."},{"key":"e_1_3_2_2_78_1","volume-title":"Picket: Self-supervised Data Diagnostics for ML Pipelines. CoRR","author":"Liu Zifan","year":"2020","unstructured":"Zifan Liu, Zhechun Zhou, and Theodoros Rekatsinas. 2020. Picket: Self-supervised Data Diagnostics for ML Pipelines. CoRR, Vol. abs\/2006.04730 (2020)."},{"key":"e_1_3_2_2_79_1","volume-title":"Cross-token modeling with conditional computation. arXiv preprint arXiv:2109.02008","author":"Lou Yuxuan","year":"2021","unstructured":"Yuxuan Lou, Fuzhao Xue, Zangwei Zheng, and Yang You. 2021. Cross-token modeling with conditional computation. arXiv preprint arXiv:2109.02008 (2021)."},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_2_81_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407801"},{"key":"e_1_3_2_2_82_1","volume-title":"Samuel Madden, Mourad Ouzzani, Michael Stonebraker, and Nan Tang.","author":"Mahdavi Mohammad","year":"2019","unstructured":"Mohammad Mahdavi, Ziawasch Abedjan, Raul Castro Fernandez, Samuel Madden, Mourad Ouzzani, Michael Stonebraker, and Nan Tang. 2019. Raha: A Configuration-Free Error Detection System. In SIGMOD. 865--882."},{"key":"e_1_3_2_2_83_1","volume-title":"PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods. https:\/\/github.com\/huggingface\/peft.","author":"Mangrulkar Sourab","year":"2022","unstructured":"Sourab Mangrulkar, Sylvain Gugger, Lysandre Debut, Younes Belkada, Sayak Paul, and Benjamin Bossan. 2022. PEFT: State-of-the-art Parameter-Efficient Fine-Tuning methods. https:\/\/github.com\/huggingface\/peft."},{"key":"e_1_3_2_2_84_1","volume-title":"Inverse Scaling: When Bigger Isn't Better. arXiv preprint arXiv:2306.09479","author":"McKenzie Ian R","year":"2023","unstructured":"Ian R McKenzie, Alexander Lyzhov, Michael Pieler, Alicia Parrish, Aaron Mueller, Ameya Prabhu, Euan McLean, Aaron Kirtland, Alexis Ross, Alisa Liu, et al. 2023. Inverse Scaling: When Bigger Isn't Better. arXiv preprint arXiv:2306.09479 (2023)."},{"key":"e_1_3_2_2_85_1","volume-title":"Capturing Semantics for Imputation with Pre-trained Language Models","author":"Mei Yinan","unstructured":"Yinan Mei, Shaoxu Song, Chenguang Fang, Haifeng Yang, Jingyun Fang, and Jiang Long. 2021. Capturing Semantics for Imputation with Pre-trained Language Models. In ICDE. IEEE, 61--72."},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"crossref","unstructured":"Sidharth Mudgal Han Li Theodoros Rekatsinas AnHai Doan Youngchoon Park Ganesh Krishnan Rohit Deep Esteban Arcaute and Vijay Raghavendra. 2018. Deep Learning for Entity Matching: A Design Space Exploration. In SIGMOD. 19--34.","DOI":"10.1145\/3183713.3196926"},{"key":"e_1_3_2_2_87_1","first-page":"9564","article-title":"Multimodal contrastive learning with limoe: the language-image mixture of experts","volume":"35","author":"Mustafa Basil","year":"2022","unstructured":"Basil Mustafa, Carlos Riquelme, Joan Puigcerver, Rodolphe Jenatton, and Neil Houlsby. 2022. Multimodal contrastive learning with limoe: the language-image mixture of experts. Advances in Neural Information Processing Systems, Vol. 35 (2022), 9564--9576.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_88_1","volume-title":"International Conference on Machine Learning. PMLR, 7130--7140","author":"Muzellec Boris","year":"2020","unstructured":"Boris Muzellec, Julie Josse, Claire Boyer, and Marco Cuturi. 2020. Missing data imputation using optimal transport. In International Conference on Machine Learning. PMLR, 7130--7140."},{"key":"e_1_3_2_2_89_1","volume-title":"Can foundation models wrangle your data? arXiv preprint arXiv:2205.09911","author":"Narayan Avanika","year":"2022","unstructured":"Avanika Narayan, Ines Chami, Laurel Orr, Simran Arora, and Christopher R\u00e9. 2022. Can foundation models wrangle your data? arXiv preprint arXiv:2205.09911 (2022)."},{"key":"e_1_3_2_2_90_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107501"},{"key":"e_1_3_2_2_91_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_2_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377455"},{"key":"e_1_3_2_2_93_1","volume-title":"Using ChatGPT for Entity Matching. arXiv preprint arXiv:2305.03423","author":"Peeters Ralph","year":"2023","unstructured":"Ralph Peeters and Christian Bizer. 2023. Using ChatGPT for Entity Matching. arXiv preprint arXiv:2305.03423 (2023)."},{"key":"e_1_3_2_2_94_1","doi-asserted-by":"publisher","DOI":"10.14778\/3570690.3570694"},{"key":"e_1_3_2_2_95_1","volume-title":"Adapterfusion: Non-destructive task composition for transfer learning. arXiv preprint arXiv:2005.00247","author":"Pfeiffer Jonas","year":"2020","unstructured":"Jonas Pfeiffer, Aishwarya Kamath, Andreas R\u00fcckl\u00e9, Kyunghyun Cho, and Iryna Gurevych. 2020. Adapterfusion: Non-destructive task composition for transfer learning. arXiv preprint arXiv:2005.00247 (2020)."},{"key":"e_1_3_2_2_96_1","unstructured":"Yujia Qin Xiaozhi Wang Yusheng Su Yankai Lin Ning Ding Jing Yi Weize Chen Zhiyuan Liu Juanzi Li Lei Hou et al. 2021. Exploring Universal Intrinsic Task Subspace via Prompt Tuning. arXiv preprint arXiv:2110.07867 (2021)."},{"key":"e_1_3_2_2_97_1","doi-asserted-by":"crossref","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks.","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_2_98_1","volume-title":"Holoclean: Holistic data repairs with probabilistic inference. arXiv preprint arXiv:1702.00820","author":"Rekatsinas Theodoros","year":"2017","unstructured":"Theodoros Rekatsinas, Xu Chu, Ihab F Ilyas, and Christopher R\u00e9. 2017. Holoclean: Holistic data repairs with probabilistic inference. arXiv preprint arXiv:1702.00820 (2017)."},{"key":"e_1_3_2_2_99_1","first-page":"17555","article-title":"Hash layers for large sparse models","volume":"34","author":"Roller Stephen","year":"2021","unstructured":"Stephen Roller, Sainbayar Sukhbaatar, Jason Weston, et al. 2021. Hash layers for large sparse models. NIPS, Vol. 34 (2021), 17555--17566.","journal-title":"NIPS"},{"key":"e_1_3_2_2_100_1","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2021. High-Resolution Image Synthesis with Latent Diffusion Models. arxiv: 2112.10752 [cs.CV]"},{"key":"e_1_3_2_2_101_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder. 2017. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098 (2017)."},{"key":"e_1_3_2_2_102_1","volume-title":"Barret Zoph, William Fedus, Xinyun Chen, et al.","author":"Shen Sheng","year":"2023","unstructured":"Sheng Shen, Le Hou, Yanqi Zhou, Nan Du, Shayne Longpre, Jason Wei, Hyung Won Chung, Barret Zoph, William Fedus, Xinyun Chen, et al. 2023. Mixture-of-experts meets instruction tuning: A winning combination for large language models. arXiv preprint arXiv:2305.14705 (2023)."},{"key":"e_1_3_2_2_103_1","first-page":"275","article-title":"Enriching data imputation under similarity rule constraints","volume":"32","author":"Song Shaoxu","year":"2018","unstructured":"Shaoxu Song, Yu Sun, Aoqian Zhang, Lei Chen, and Jianmin Wang. 2018. Enriching data imputation under similarity rule constraints. TKDE, Vol. 32, 2 (2018), 275--287.","journal-title":"TKDE"},{"key":"e_1_3_2_2_104_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.06.005"},{"key":"e_1_3_2_2_105_1","first-page":"15920","article-title":"Adversarial graph augmentation to improve graph contrastive learning","volume":"34","author":"Suresh Susheel","year":"2021","unstructured":"Susheel Suresh, Pan Li, Cong Hao, and Jennifer Neville. 2021. Adversarial graph augmentation to improve graph contrastive learning. Advances in Neural Information Processing Systems, Vol. 34 (2021), 15920--15933.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_106_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476294"},{"key":"e_1_3_2_2_107_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86362-3_19"},{"key":"e_1_3_2_2_108_1","volume-title":"The information bottleneck method. arXiv preprint physics\/0004057","author":"Tishby Naftali","year":"2000","unstructured":"Naftali Tishby, Fernando C Pereira, and William Bialek. 2000. The information bottleneck method. arXiv preprint physics\/0004057 (2000)."},{"key":"e_1_3_2_2_109_1","volume-title":"Deep learning and the information bottleneck principle. In 2015 ieee information theory workshop (itw)","author":"Tishby Naftali","unstructured":"Naftali Tishby and Noga Zaslavsky. 2015. Deep learning and the information bottleneck principle. In 2015 ieee information theory workshop (itw). IEEE, 1--5."},{"key":"e_1_3_2_2_110_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_2_111_1","volume-title":"PromptEM: prompt-tuning for low-resource generalized entity matching. arXiv preprint arXiv:2207.04802","author":"Wang Pengfei","year":"2022","unstructured":"Pengfei Wang, Xiaocan Zeng, Lu Chen, Fan Ye, Yuren Mao, Junhao Zhu, and Yunjun Gao. 2022. PromptEM: prompt-tuning for low-resource generalized entity matching. arXiv preprint arXiv:2207.04802 (2022)."},{"key":"e_1_3_2_2_112_1","volume-title":"Ahmed Hassan Awadallah, and Jianfeng Gao","author":"Wang Yaqing","year":"2022","unstructured":"Yaqing Wang, Sahaj Agarwal, Subhabrata Mukherjee, Xiaodong Liu, Jing Gao, Ahmed Hassan Awadallah, and Jianfeng Gao. 2022. Adamix: Mixture-of-adaptations for parameter-efficient model tuning. arXiv preprint arXiv:2205.12410 (2022)."},{"key":"e_1_3_2_2_113_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-013-0308-z"},{"key":"e_1_3_2_2_114_1","volume-title":"Parameter-Efficient Sparsity Crafting from Dense to Mixture-of-Experts for Instruction Tuning on General Tasks. arXiv preprint arXiv:2401.02731","author":"Wu Haoyuan","year":"2024","unstructured":"Haoyuan Wu, Haisheng Zheng, and Bei Yu. 2024. Parameter-Efficient Sparsity Crafting from Dense to Mixture-of-Experts for Instruction Tuning on General Tasks. arXiv preprint arXiv:2401.02731 (2024)."},{"key":"e_1_3_2_2_115_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-42941-5_21"},{"key":"e_1_3_2_2_116_1","first-page":"307","article-title":"Attention-based learning for missing data imputation in HoloClean","volume":"2","author":"Wu Richard","year":"2020","unstructured":"Richard Wu, Aoqian Zhang, Ihab Ilyas, and Theodoros Rekatsinas. 2020. Attention-based learning for missing data imputation in HoloClean. Proceedings of Machine Learning and Systems, Vol. 2 (2020), 307--325.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_2_117_1","volume-title":"MLSys","author":"Wu Richard","year":"2020","unstructured":"Richard Wu, Aoqian Zhang, Ihab F. Ilyas, and Theodoros Rekatsinas. 2020. Attention-based Learning for Missing Data Imputation in HoloClean. In MLSys 2020."},{"key":"e_1_3_2_2_118_1","volume-title":"Elmagarmid","author":"Yakout Mohamed","year":"2013","unstructured":"Mohamed Yakout, Laure Berti-\u00c9quille, and Ahmed K. Elmagarmid. 2013. Don't Be SCAREd: Use SCalable Automatic REpairing with Maximal Likelihood and Bounded Changes. In SIGMOD. ACM."},{"key":"e_1_3_2_2_119_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498377"},{"key":"e_1_3_2_2_120_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.189"},{"key":"e_1_3_2_2_121_1","volume-title":"International conference on machine learning. PMLR, 5689--5698","author":"Yoon Jinsung","year":"2018","unstructured":"Jinsung Yoon, James Jordon, and Mihaela Schaar. 2018. Gain: Missing data imputation using generative adversarial nets. In International conference on machine learning. PMLR, 5689--5698."},{"key":"e_1_3_2_2_122_1","volume-title":"GAIN: Missing Data Imputation using Generative Adversarial Nets. In ICML. PMLR, 5675--5684.","author":"Yoon Jinsung","year":"2018","unstructured":"Jinsung Yoon, James Jordon, and Mihaela van der Schaar. 2018. GAIN: Missing Data Imputation using Generative Adversarial Nets. In ICML. PMLR, 5675--5684."},{"key":"e_1_3_2_2_123_1","volume-title":"Kola: Carefully benchmarking world knowledge of large language models. arXiv preprint arXiv:2306.09296","author":"Yu Jifan","year":"2023","unstructured":"Jifan Yu, Xiaozhi Wang, Shangqing Tu, Shulin Cao, Daniel Zhang-Li, Xin Lv, Hao Peng, Zijun Yao, Xiaohan Zhang, Hanming Li, et al. 2023. Kola: Carefully benchmarking world knowledge of large language models. arXiv preprint arXiv:2306.09296 (2023)."},{"key":"e_1_3_2_2_124_1","volume-title":"Pushing mixture of experts to the limit: Extremely parameter efficient moe for instruction tuning. arXiv preprint arXiv:2309.05444","author":"Zadouri Ted","year":"2023","unstructured":"Ted Zadouri, Ahmet \u00dcst\u00fcn, Arash Ahmadian, Beyza Ermics, Acyr Locatelli, and Sara Hooker. 2023. Pushing mixture of experts to the limit: Extremely parameter efficient moe for instruction tuning. arXiv preprint arXiv:2309.05444 (2023)."},{"key":"e_1_3_2_2_125_1","volume-title":"Jellyfish: A Large Language Model for Data Preprocessing. arXiv preprint arXiv:2312.01678","author":"Zhang Haochen","year":"2023","unstructured":"Haochen Zhang, Yuyang Dong, Chuan Xiao, and Masafumi Oyamada. 2023. Jellyfish: A Large Language Model for Data Preprocessing. arXiv preprint arXiv:2312.01678 (2023)."},{"key":"e_1_3_2_2_126_1","volume-title":"Large Language Models as Data Preprocessors. arXiv preprint arXiv:2308.16361","author":"Zhang Haochen","year":"2023","unstructured":"Haochen Zhang, Yuyang Dong, Chuan Xiao, and Masafumi Oyamada. 2023. Large Language Models as Data Preprocessors. arXiv preprint arXiv:2308.16361 (2023)."},{"key":"e_1_3_2_2_127_1","volume-title":"SMAT: An attention-based deep learning solution to the automation of schema matching","author":"Zhang Jing","year":"2021","unstructured":"Jing Zhang, Bonggun Shin, Jinho D Choi, and Joyce C Ho. 2021. SMAT: An attention-based deep learning solution to the automation of schema matching. In ADBIS. Springer, 260--274."},{"key":"e_1_3_2_2_128_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331385"},{"key":"e_1_3_2_2_129_1","doi-asserted-by":"crossref","unstructured":"Shijie Zhang Xin Yan Xuejiao Yang Binfeng Jia and Shuangyang Wang. 2023. Out of the Box Thinking: Improving Customer Lifetime Value Modelling via Expert Routing and Game Whale Detection. In CIKM. 3206--3215.","DOI":"10.1145\/3583780.3615002"},{"key":"e_1_3_2_2_130_1","volume-title":"2023 d. TableLlama: Towards Open Large Generalist Models for Tables. arXiv preprint arXiv:2311.09206","author":"Zhang Tianshu","year":"2023","unstructured":"Tianshu Zhang, Xiang Yue, Yifei Li, and Huan Sun. 2023 d. TableLlama: Towards Open Large Generalist Models for Tables. arXiv preprint arXiv:2311.09206 (2023)."},{"key":"e_1_3_2_2_131_1","doi-asserted-by":"crossref","unstructured":"Yi Zhang and Zachary G Ives. 2020. Finding related tables in data lakes for interactive data science. In SIGMOD. 1951--1966.","DOI":"10.1145\/3318464.3389726"},{"key":"e_1_3_2_2_132_1","doi-asserted-by":"crossref","unstructured":"Chen Zhao and Yeye He. 2019. Auto-EM: End-to-end Fuzzy Entity-Matching using Pre-trained Deep Models and Transfer Learning. In WWW. 2413--2424.","DOI":"10.1145\/3308558.3313578"},{"key":"e_1_3_2_2_133_1","volume-title":"Generalization Error Analysis for Sparse Mixture-of-Experts: A Preliminary Study. arXiv preprint arXiv:2403.17404","author":"Zhao Jinze","year":"2024","unstructured":"Jinze Zhao, Peihao Wang, and Zhangyang Wang. 2024. Generalization Error Analysis for Sparse Mixture-of-Experts: A Preliminary Study. arXiv preprint arXiv:2403.17404 (2024)."},{"key":"e_1_3_2_2_134_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Zheng Chujie","year":"2023","unstructured":"Chujie Zheng, Hao Zhou, Fandong Meng, Jie Zhou, and Minlie Huang. 2023. Large language models are not robust multiple choice selectors. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_2_135_1","volume-title":"LlamaFactory: Unified Efficient Fine-Tuning of 100 Language Models. arXiv preprint arXiv:2403.13372","author":"Zheng Yaowei","year":"2024","unstructured":"Yaowei Zheng, Richong Zhang, Junhao Zhang, Yanhan Ye, Zheyan Luo, and Yongqiang Ma. 2024. LlamaFactory: Unified Efficient Fine-Tuning of 100 Language Models. arXiv preprint arXiv:2403.13372 (2024). http:\/\/arxiv.org\/abs\/2403.13372"},{"key":"e_1_3_2_2_136_1","first-page":"7103","article-title":"Mixture-of-experts with expert choice routing","volume":"35","author":"Zhou Yanqi","year":"2022","unstructured":"Yanqi Zhou, Tao Lei, Hanxiao Liu, Nan Du, Yanping Huang, Vincent Zhao, Andrew M Dai, Quoc V Le, James Laudon, et al. 2022. Mixture-of-experts with expert choice routing. Advances in Neural Information Processing Systems, Vol. 35 (2022), 7103--7114.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_137_1","volume-title":"St-moe: Designing stable and transferable sparse expert models. arXiv preprint arXiv:2202.08906","author":"Zoph Barret","year":"2022","unstructured":"Barret Zoph, Irwan Bello, Sameer Kumar, Nan Du, Yanping Huang, Jeff Dean, Noam Shazeer, and William Fedus. 2022. St-moe: Designing stable and transferable sparse expert models. arXiv preprint arXiv:2202.08906 (2022)."},{"key":"e_1_3_2_2_138_1","volume-title":"Hany Hassan, Ruofei Zhang, Tuo Zhao, and Jianfeng Gao.","author":"Zuo Simiao","year":"2021","unstructured":"Simiao Zuo, Xiaodong Liu, Jian Jiao, Young Jin Kim, Hany Hassan, Ruofei Zhang, Tuo Zhao, and Jianfeng Gao. 2021. Taming sparsely activated transformer with stochastic experts. arXiv preprint arXiv:2110.04260 (2021)."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671873","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671873","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:15Z","timestamp":1750291455000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671873"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":138,"alternative-id":["10.1145\/3637528.3671873","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671873","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}