{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T01:25:20Z","timestamp":1781054720672,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3517906","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"1493-1503","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":68,"title":["Annotating Columns with Pre-trained Language Models"],"prefix":"10.1145","author":[{"given":"Yoshihiko","family":"Suhara","sequence":"first","affiliation":[{"name":"Megagon Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jinfeng","family":"Li","sequence":"additional","affiliation":[{"name":"Megagon Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuliang","family":"Li","sequence":"additional","affiliation":[{"name":"Megagon Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Megagon Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"\u00c7a\u011fatay","family":"Demiralp","sequence":"additional","affiliation":[{"name":"Sigma Computing, San Fransisco, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chen","family":"Chen","sequence":"additional","affiliation":[{"name":"Megagon Labs, Mountain View, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wang-Chiew","family":"Tan","sequence":"additional","affiliation":[{"name":"Meta AI, Menlo Park, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Kurt Bollacker Colin Evans Praveen Paritosh Tim Sturge and Jamie Taylor. 2008. Freebase: a collaboratively created graph database for structuring human knowledge. In SIGMOD. 1247--1250.","DOI":"10.1145\/1376616.1376746"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186029"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389742"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50012-5"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007379606734"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-019-00564-x"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.330129"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019"},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. BlackBoxNLP '19","author":"Clark Kevin","unstructured":"Kevin Clark, Urvashi Khandelwal, Omer Levy, and Christopher D. Manning. 2019. What Does BERT Look at? An Analysis of BERT's Attention. In Proc. BlackBoxNLP '19. 276--286."},{"key":"e_1_3_2_1_11_1","volume-title":"Ziawasch Abedjan, Sibo Wang, Michael Stonebraker, Ahmed K. Elmagarmid, Ihab F. Ilyas, Samuel Madden, Mourad Ouzzani, and Nan Tang.","author":"Deng Dong","year":"2017","unstructured":"Dong Deng, Raul Castro Fernandez, Ziawasch Abedjan, Sibo Wang, Michael Stonebraker, Ahmed K. Elmagarmid, Ihab F. Ilyas, Samuel Madden, Mourad Ouzzani, and Nan Tang. 2017. The Data Civilizer System. In 8th Biennial Conference on Innovative Data Systems Research, CIDR 2017, Chaminade, CA, USA, January 8--11, 2017, Online Proceedings. www.cidrdb.org. http:\/\/cidrdb.org\/cidr2017\/papers\/p44-deng-cidr17.pdf"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.14778\/3430915.3430921"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-155860869-6\/50060-3"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_16_1","volume-title":"https:\/\/gluebenchmark.com\/leaderboard","author":"Leaderboard GLUE.","year":"2021","unstructured":"GLUE. 2021. GLUE Leaderboard. https:\/\/gluebenchmark.com\/leaderboard (2021)."},{"key":"e_1_3_2_1_17_1","unstructured":"Google. [n.d.]. Google Data Studio. https:\/\/datastudio.google.com\/"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300892"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330993"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00324"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482295"},{"key":"e_1_3_2_1_24_1","volume-title":"Valentine: Evaluating Matching Techniques for Dataset Discovery. In 2021 IEEE 37th International Conference on Data Engineering (ICDE). IEEE, 468--479","author":"Koutras Christos","year":"2021","unstructured":"Christos Koutras, George Siachamis, Andra Ionescu, Kyriakos Psarakis, Jerry Brons, Marios Fragkoulis, Christoph Lofi, Angela Bonifati, and Asterios Katsifodimos. 2021. Valentine: Evaluating Matching Techniques for Dataset Discovery. In 2021 IEEE 37th International Conference on Data Engineering (ICDE). IEEE, 468--479."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/3421424.3421431"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1921005"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412164"},{"key":"e_1_3_2_1_28_1","volume-title":"1st International Conference on Learning Representations, ICLR","author":"Mikolov Tom\u00e1","year":"2013","unstructured":"Tom\u00e1 s Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. In 1st International Conference on Learning Representations, ICLR 2013, Scottsdale, Arizona, USA, May 2--4, 2013, Workshop Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1301.3781"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3240491"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2556195.2556266"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning. Association for Computational Linguistics","author":"Nakashole Ndapandula","year":"2012","unstructured":"Ndapandula Nakashole, Gerhard Weikum, and Fabian Suchanek. 2012. PATTY: A Taxonomy of Relational Patterns with Semantic Types. In Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning. Association for Computational Linguistics, Jeju Island, Korea, 1135--1145. https:\/\/www.aclweb.org\/anthology\/D12--1104"},{"key":"e_1_3_2_1_32_1","unstructured":"Phuc Nguyen Natthawut Kertkeidkachorn Ryutaro Ichise and Hideaki Takeda. [n.d.]. MTab4DBpedia: Semantic Annotation for Tabular Data with DBpedia. ( [n. d.])."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.14778\/3384345.3384346"},{"key":"e_1_3_2_1_34_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32, H. Wallach, H. Larochelle, A. Beygelzimer, F. dtextquotesingle Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.). Curran Associates, Inc., 8024--8035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s007780100057"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_39_1","unstructured":"Sebastian Ruder. 2017. An Overview of Multi-Task Learning in Deep Neural Networks. arxiv: 1706.05098 [cs.LG]"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229867"},{"key":"e_1_3_2_1_41_1","unstructured":"Tableau Software. [n.d.]. Tableau. https:\/\/www.tableau.com\/"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Yoshihiko Suhara Jinfeng Li Yuliang Li Dan Zhang \u00c7a?atay Demiralp Chen Chen and Wang-Chiew Tan. 2021. Annotating Columns with Pre-trained Language Models. arxiv: 2104.01785 [cs.DB]","DOI":"10.1145\/3514221.3517906"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Yu Sun Shuohuan Wang Yu-Kun Li Shikun Feng Hao Tian Hua Wu and Haifeng Wang. 2020. ERNIE 2.0: A Continual Pre-Training Framework for Language Understanding. In AAAI. 8968--8975.","DOI":"10.1609\/aaai.v34i05.6428"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301281"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.14778\/3457390.3457391"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Ian Tenney Dipanjan Das and Ellie Pavlick. 2019. BERT Rediscovers the Classical NLP Pipeline. In ACL. 4593--4601.","DOI":"10.18653\/v1\/P19-1452"},{"key":"e_1_3_2_1_47_1","volume-title":"Semantic Labeling Using a Deep Contextualized Language Model. arxiv","author":"Trabelsi Mohamed","year":"2010","unstructured":"Mohamed Trabelsi, Jin Cao, and Jeff Heflin. 2020. Semantic Labeling Using a Deep Contextualized Language Model. arxiv: 2010.16037 [cs.LG]"},{"key":"e_1_3_2_1_48_1","volume-title":"Proc. NIPS '17","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Proc. NIPS '17. 5998--6008."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.14778\/2002938.2002939"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_51_1","volume-title":"Bowman","author":"Wang Alex","year":"2019","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R. Bowman. 2019. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In ICLR."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450090"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467434"},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38--45","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020. Transformers: State-of-the-Art Natural Language Processing. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Association for Computational Linguistics, Online, 38--45. https:\/\/www.aclweb.org\/anthology\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_55_1","volume-title":"Deep transformer models for time series forecasting: The influenza prevalence case. arXiv preprint arXiv:2001.08317","author":"Wu Neo","year":"2020","unstructured":"Neo Wu, Bradley Green, Xue Ben, and Shawn O'Banion. 2020. Deep transformer models for time series forecasting: The influenza prevalence case. arXiv preprint arXiv:2001.08317 (2020)."},{"key":"e_1_3_2_1_56_1","volume-title":"Trace Norm Regularised Deep Multi-Task Learning. In ICLR '17 Workshop Track .","author":"Yang Yongxin","unstructured":"Yongxin Yang and Timothy M. Hospedales. 2017. Trace Norm Regularised Deep Multi-Task Learning. In ICLR '17 Workshop Track ."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.3115\/1614164.1614177"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000008"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407793"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989336"}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3517906","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3517906","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:10:06Z","timestamp":1750183806000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3517906"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":61,"alternative-id":["10.1145\/3514221.3517906","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3517906","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}