{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:08:49Z","timestamp":1766268529421,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"European Union's Horizon 2020 MariCybERA-project","award":["952360"],"award-info":[{"award-number":["952360"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539064","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:41Z","timestamp":1660331201000},"page":"4010-4020","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Learning Supplementary NLP Features for CTR Prediction in Sponsored Search"],"prefix":"10.1145","author":[{"given":"Dong","family":"Wang","sequence":"first","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}]},{"given":"Shaoguang","family":"Yan","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}]},{"given":"Yunqing","family":"Xia","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}]},{"given":"Kav\u00e9","family":"Salamatian","sequence":"additional","affiliation":[{"name":"University of Savoie &amp; Tallinn University of Technology, Annecy, France"}]},{"given":"Weiwei","family":"Deng","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}]},{"given":"Qi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining. 535--541","author":"Cristian","year":"2006","unstructured":"Cristian Bucilu?, Rich Caruana, and Alexandru Niculescu-Mizil. 2006. Model compression. In Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining. 535--541."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_2_3_1","volume-title":"AUC optimization vs. error rate minimization. Advances in neural information processing systems","author":"Cortes Corinna","year":"2004","unstructured":"Corinna Cortes and Mehryar Mohri. 2004. AUC optimization vs. error rate minimization. Advances in neural information processing systems, Vol. 16, 16 (2004), 313--320."},{"key":"e_1_3_2_2_4_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330651"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178797"},{"key":"e_1_3_2_2_7_1","unstructured":"Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li and Xiuqiang He. 2017. DeepFM: a factorization-machine based neural network for CTR prediction. arXiv preprint arXiv:1703.04247 (2017)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412699"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2648584.2648589"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00201"},{"key":"e_1_3_2_2_12_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_2_13_1","volume-title":"Fine-tuned language models for text classification. arXiv preprint arXiv:1801.06146","author":"Howard Jeremy","year":"2018","unstructured":"Jeremy Howard and Sebastian Ruder. 2018. Fine-tuned language models for text classification. arXiv preprint arXiv:1801.06146 (2018), 194."},{"key":"e_1_3_2_2_14_1","volume-title":"Like what you like: Knowledge distill via neuron selectivity transfer. arXiv preprint arXiv:1707.01219","author":"Huang Zehao","year":"2017","unstructured":"Zehao Huang and Naiyan Wang. 2017. Like what you like: Knowledge distill via neuron selectivity transfer. arXiv preprint arXiv:1707.01219 (2017)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"crossref","unstructured":"Yunjiang Jiang Yue Shang Ziyang Liu Hongwei Shen Yun Xiao Wei Xiong Sulong Xu et al. 2020. BERT2DNN: BERT Distillation with Massive Unlabeled Data for Online E-Commerce Search. arXiv preprint arXiv:2010.10442 (2020).","DOI":"10.1109\/ICDM50108.2020.00030"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330858"},{"key":"e_1_3_2_2_17_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 7482--7491","author":"Kendall Alex","year":"2018","unstructured":"Alex Kendall, Yarin Gal, and Roberto Cipolla. 2018. Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In Proceedings of the IEEE conference on computer vision and pattern recognition. 7482--7491."},{"key":"e_1_3_2_2_18_1","volume-title":"Paraphrasing complex network: Network compression via factor transfer. arXiv preprint arXiv:1802.04977","author":"Kim Jangho","year":"2018","unstructured":"Jangho Kim, SeoungUK Park, and Nojun Kwak. 2018. Paraphrasing complex network: Network compression via factor transfer. arXiv preprint arXiv:1802.04977 (2018)."},{"key":"e_1_3_2_2_19_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401243"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3041021.3054192"},{"key":"e_1_3_2_2_22_1","volume-title":"2019 a. Multi-task deep neural networks for natural language understanding. arXiv preprint arXiv:1901.11504","author":"Liu Xiaodong","year":"2019","unstructured":"Xiaodong Liu, Pengcheng He, Weizhu Chen, and Jianfeng Gao. 2019 a. Multi-task deep neural networks for natural language understanding. arXiv preprint arXiv:1901.11504 (2019)."},{"key":"e_1_3_2_2_23_1","volume-title":"2019 b. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and et al. 2019 b. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412747"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2488200"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331268"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_17"},{"key":"e_1_3_2_2_28_1","volume-title":"TinySearch--Semantics based Search Engine using Bert Embeddings. arXiv preprint arXiv:1908.02451","author":"Patel Manish","year":"2019","unstructured":"Manish Patel. 2019. TinySearch--Semantics based Search Engine using Bert Embeddings. arXiv preprint arXiv:1908.02451 (2019)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2010.127"},{"key":"e_1_3_2_2_30_1","volume-title":"Antoine Chassang, Carlo Gatta, and Yoshua Bengio.","author":"Romero Adriana","year":"2014","unstructured":"Adriana Romero, Nicolas Ballas, Samira Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2014. Fitnets: Hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854669"},{"key":"e_1_3_2_2_32_1","volume-title":"Ernie: Enhanced representation through knowledge integration. arXiv preprint arXiv:1904.09223","author":"Sun Yu","year":"2019","unstructured":"Yu Sun, Shuohuan Wang, Yukun Li, Shikun Feng, Xuyi Chen, Han Zhang, Xin Tian, Danxiang Zhu, Hao Tian, and Hua Wu. 2019. Ernie: Enhanced representation through knowledge integration. arXiv preprint arXiv:1904.09223 (2019)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2013.05.103"},{"key":"e_1_3_2_2_34_1","volume-title":"Studies","volume":"55","author":"Wang Zhe","year":"2020","unstructured":"Zhe Wang, Rundong Shi, Shijie Li, and Peng Yan. 2020. GBDT and BERT: a Hybrid Solution for Recognizing Citation Intent. Studies, Vol. 55 (2020), 12c2a39230188."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449830"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11601"}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Washington DC USA","acronym":"KDD '22"},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539064","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539064","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:50Z","timestamp":1750183790000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539064"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":37,"alternative-id":["10.1145\/3534678.3539064","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539064","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}