{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:31:25Z","timestamp":1759883485514,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715868","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:06:11Z","timestamp":1748016371000},"page":"1-4","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Algorithms for Leveraging LLMs for Generative and Predictive Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3482-0421","authenticated-orcid":false,"given":"Kayhan","family":"Behdin","sequence":"first","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2555-7518","authenticated-orcid":false,"given":"Yun","family":"Dai","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9422-818X","authenticated-orcid":false,"given":"Gregory","family":"Dexter","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6620-841X","authenticated-orcid":false,"given":"Aman","family":"Gupta","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1384-9743","authenticated-orcid":false,"given":"Rahul","family":"Mazumder","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4113-3614","authenticated-orcid":false,"given":"Ankan","family":"Saha","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4307-0725","authenticated-orcid":false,"given":"Qingquan","family":"Song","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9720-9905","authenticated-orcid":false,"given":"Shao","family":"Tang","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1999-2447","authenticated-orcid":false,"given":"Sirou","family":"Zhu","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5600-6239","authenticated-orcid":false,"given":"Pin-Lun","family":"Hsu","sequence":"additional","affiliation":[{"name":"LinkedIn, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"27","volume-title":"Ray et al., \"Training language models to follow instructions with human feedback,\" Advances in neural information processing systems","author":"Ouyang L.","year":"2022","unstructured":"L. Ouyang, J. Wu, X. Jiang, D. Almeida, C. Wainwright, P. Mishkin, C. Zhang, S. Agarwal, K. Slama, A. Ray et al., \"Training language models to follow instructions with human feedback,\" Advances in neural information processing systems, vol. 35, pp. 27 730--27 744, 2022."},{"key":"e_1_3_2_1_2_1","volume-title":"Direct preference optimization: Your language model is secretly a reward model,\" Advances in Neural Information Processing Systems","author":"Rafailov R.","year":"2024","unstructured":"R. Rafailov, A. Sharma, E. Mitchell, C. D. Manning, S. Ermon, and C. Finn, \"Direct preference optimization: Your language model is secretly a reward model,\" Advances in Neural Information Processing Systems, vol. 36, 2024."},{"key":"e_1_3_2_1_3_1","volume-title":"Reference-free monolithic preference optimization with odds ratio,\" arXiv preprint arXiv:2403.07691","author":"Hong J.","year":"2024","unstructured":"J. Hong, N. Lee, and J. Thorne, \"Reference-free monolithic preference optimization with odds ratio,\" arXiv preprint arXiv:2403.07691, 2024."},{"key":"e_1_3_2_1_4_1","volume-title":"Self-play preference optimization for language model alignment,\" arXiv preprint arXiv:2405.00675","author":"Wu Y.","year":"2024","unstructured":"Y.Wu, Z. Sun, H. Yuan, K. Ji, Y. Yang, and Q. Gu, \"Self-play preference optimization for language model alignment,\" arXiv preprint arXiv:2405.00675, 2024."},{"key":"e_1_3_2_1_5_1","volume-title":"Gptq: Accurate posttraining quantization for generative pre-trained transformers,\" arXiv preprint arXiv:2210.17323","author":"Frantar E.","year":"2022","unstructured":"E. Frantar, S. Ashkboos, T. Hoefler, and D. Alistarh, \"Gptq: Accurate posttraining quantization for generative pre-trained transformers,\" arXiv preprint arXiv:2210.17323, 2022."},{"key":"e_1_3_2_1_6_1","first-page":"87","volume":"6","author":"Lin J.","year":"2024","unstructured":"J. Lin, J. Tang, H. Tang, S. Yang, W.-M. Chen, W.-C. Wang, G. Xiao, X. Dang, C. Gan, and S. Han, \"Awq: Activation-aware weight quantization for on-device llm compression and acceleration,\" Proceedings of Machine Learning and Systems, vol. 6, pp. 87--100, 2024.","journal-title":"\"Awq: Activation-aware weight quantization for on-device llm compression and acceleration,\" Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_7_1","volume-title":"Quantease: Optimization-based quantization for language models-an efficient and intuitive algorithm,\" stat","author":"Behdin K.","year":"2023","unstructured":"K. Behdin, A. Acharya, S. K. Aman Gupta, and R. Mazumder, \"Quantease: Optimization-based quantization for language models-an efficient and intuitive algorithm,\" stat, vol. 1050, p. 5, 2023."},{"key":"e_1_3_2_1_8_1","volume-title":"Spinquant--llm quantization with learned rotations,\" arXiv preprint arXiv:2405.16406","author":"Liu Z.","year":"2024","unstructured":"Z. Liu, C. Zhao, I. Fedorov, B. Soran, D. Choudhary, R. Krishnamoorthi, V. Chandra, Y. Tian, and T. Blankevoort, \"Spinquant--llm quantization with learned rotations,\" arXiv preprint arXiv:2405.16406, 2024."},{"key":"e_1_3_2_1_9_1","volume-title":"Alps: Improved optimization for highly sparse one-shot pruning for large language models,\" arXiv preprint arXiv:2406.07831","author":"Meng X.","year":"2024","unstructured":"X. Meng, K. Behdin, H. Wang, and R. Mazumder, \"Alps: Improved optimization for highly sparse one-shot pruning for large language models,\" arXiv preprint arXiv:2406.07831, 2024."},{"key":"e_1_3_2_1_10_1","first-page":"10","volume-title":"PMLR","author":"Frantar E.","year":"2023","unstructured":"E. Frantar and D. Alistarh, \"Sparsegpt: Massive language models can be accurately pruned in one-shot,\" in International Conference on Machine Learning. PMLR, 2023, pp. 10 323--10 337."},{"key":"e_1_3_2_1_11_1","first-page":"2031","volume-title":"PMLR","author":"Benbaki R.","year":"2023","unstructured":"R. Benbaki, W. Chen, X. Meng, H. Hazimeh, N. Ponomareva, Z. Zhao, and R. Mazumder, \"Fast as chita: Neural network pruning with combinatorial optimization,\" in International Conference on Machine Learning. PMLR, 2023, pp. 2031--2049."},{"key":"e_1_3_2_1_12_1","volume-title":"Osscar: One-shot structured pruning in vision and language models with combinatorial optimization,\" arXiv preprint arXiv:2403.12983","author":"Meng X.","year":"2024","unstructured":"X. Meng, S. Ibrahim, K. Behdin, H. Hazimeh, N. Ponomareva, and R. Mazumder, \"Osscar: One-shot structured pruning in vision and language models with combinatorial optimization,\" arXiv preprint arXiv:2403.12983, 2024."},{"key":"e_1_3_2_1_13_1","volume-title":"Programming massively parallel processors: a hands-on approach. Morgan kaufmann","author":"Kirk D. B.","year":"2016","unstructured":"D. B. Kirk and W. H. Wen-Mei, Programming massively parallel processors: a hands-on approach. Morgan kaufmann, 2016."},{"key":"e_1_3_2_1_14_1","volume-title":"Liger kernel: Efficient triton kernels for llm training,\" arXiv e-prints","author":"Hsu P.-L.","year":"2024","unstructured":"P.-L. Hsu, Y. Dai, V. Kothapalli, Q. Song, S. Tang, S. Zhu, S. Shimizu, S. Sahni, H. Ning, and Y. Chen, \"Liger kernel: Efficient triton kernels for llm training,\" arXiv e-prints, pp. arXiv--2410, 2024."},{"key":"e_1_3_2_1_15_1","volume-title":"Sequence-level knowledge distillation,\" arXiv preprint arXiv:1606.07947","author":"Kim Y.","year":"2016","unstructured":"Y. Kim and A. M. Rush, \"Sequence-level knowledge distillation,\" arXiv preprint arXiv:1606.07947, 2016."},{"key":"e_1_3_2_1_16_1","volume-title":"Minillm: Knowledge distillation of large language models,\" in The Twelfth International Conference on Learning Representations","author":"Gu Y.","year":"2024","unstructured":"Y. Gu, L. Dong, F. Wei, and M. Huang, \"Minillm: Knowledge distillation of large language models,\" in The Twelfth International Conference on Learning Representations, 2024."},{"key":"e_1_3_2_1_17_1","volume-title":"On-policy distillation of language models: Learning from self-generated mistakes,\" in The Twelfth International Conference on Learning Representations","author":"Agarwal R.","year":"2024","unstructured":"R. Agarwal, N. Vieillard, Y. Zhou, P. Stanczyk, S. R. Garea, M. Geist, and O. Bachem, \"On-policy distillation of language models: Learning from self-generated mistakes,\" in The Twelfth International Conference on Learning Representations, 2024."},{"key":"e_1_3_2_1_18_1","volume-title":"Speculative knowledge distillation: Bridging the teacherstudent gap through interleaved sampling,\" arXiv preprint arXiv:2410.11325","author":"Xu W.","year":"2024","unstructured":"W. Xu, R. Han, Z. Wang, L. T. Le, D. Madeka, L. Li, W. Y. Wang, R. Agarwal, C.-Y. Lee, and T. Pfister, \"Speculative knowledge distillation: Bridging the teacherstudent gap through interleaved sampling,\" arXiv preprint arXiv:2410.11325, 2024."},{"key":"e_1_3_2_1_19_1","volume-title":"Alphapo -- reward shape matters for llm alignment","author":"Gupta A.","year":"2025","unstructured":"A. Gupta, S. Tang, Q. Song, S. Zhu, J. Hong, A. Saha, V. Gupta, N. Lee, E. Kim, J. Zhu, N. Pillai, and S. S. Keerthi, \"Alphapo -- reward shape matters for llm alignment,\" 2025. [Online]. Available: https:\/\/arxiv.org\/abs\/2501.03884"},{"key":"e_1_3_2_1_20_1","first-page":"10","volume-title":"Triton: an intermediate language and compiler for tiled neural network computations,\" in Proceedings of the 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages","author":"Tillet P.","year":"2019","unstructured":"P. Tillet, H.-T. Kung, and D. Cox, \"Triton: an intermediate language and compiler for tiled neural network computations,\" in Proceedings of the 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages, 2019, pp. 10--19."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715868","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715868","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T17:44:10Z","timestamp":1759859050000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715868"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":20,"alternative-id":["10.1145\/3701716.3715868","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715868","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}