{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:49:21Z","timestamp":1755794961121,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737207","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"4406-4413","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Cross-Domain Web Information Extraction at Pinterest"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6474-1911","authenticated-orcid":false,"given":"Michael","family":"Farag","sequence":"first","affiliation":[{"name":"Pinterest, Toronto, ON, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2785-1066","authenticated-orcid":false,"given":"Patrick","family":"Halina","sequence":"additional","affiliation":[{"name":"Pinterest, Toronto, ON, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1304-5219","authenticated-orcid":false,"given":"Andrey","family":"Zaytsev","sequence":"additional","affiliation":[{"name":"Pinterest, San Francisco, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4088-425X","authenticated-orcid":false,"given":"Alekhya","family":"Munagala","sequence":"additional","affiliation":[{"name":"Pinterest, Toronto, ON, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1045-5458","authenticated-orcid":false,"given":"Imtihan","family":"Ahmed","sequence":"additional","affiliation":[{"name":"Pinterest, Toronto, ON, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7357-429X","authenticated-orcid":false,"given":"Junhao","family":"Wang","sequence":"additional","affiliation":[{"name":"Pinterest, Toronto, ON, Canada"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"121","volume-title":"OmniSearchSage: Multi-Task Multi-Entity Embeddings for Pinterest Search. In Companion Proceedings of the ACM on Web Conference","author":"Agarwal Prabhat","year":"2024","unstructured":"Prabhat Agarwal, Minhazul Islam Sk, Nikil Pancha, Kurchi Subhra Hazra, Jiajing Xu, and Chuck Rosenberg. 2024. OmniSearchSage: Multi-Task Multi-Entity Embeddings for Pinterest Search. In Companion Proceedings of the ACM on Web Conference 2024. 121-130."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27828"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539170"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2014.07.007"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548112"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00100-9"},{"key":"e_1_3_2_2_7_1","volume-title":"Markuplm: Pre-training of text and markup language for visually-rich document understanding. arXiv preprint arXiv:2110.08518(2021).","author":"Li Junlong","year":"2021","unstructured":"Junlong Li, Yiheng Xu, Lei Cui, and Furu Wei. 2021. Markuplm: Pre-training of text and markup language for visually-rich document understanding. arXiv preprint arXiv:2110.08518(2021)."},{"key":"e_1_3_2_2_8_1","first-page":"409","article-title":"Research on Adaptive Wrapper in Deep Web Data Extraction","volume":"9502","author":"Liu Donglan","year":"2015","unstructured":"Donglan Liu, Lei Ma, and Xin Liu. 2015. Research on Adaptive Wrapper in Deep Web Data Extraction. In IOV, Vol. 9502. 409-423.","journal-title":"IOV"},{"key":"e_1_3_2_2_9_1","first-page":"562","article-title":"Practical Web Data Extraction: Are We There Yet? - A Short Survey","author":"Schulz Andreas","year":"2016","unstructured":"Andreas Schulz, J\u00f6rg L\u00e4ssig, and Martin Gaedke. 2016. Practical Web Data Extraction: Are We There Yet? - A Short Survey. In ICWI. 562-567.","journal-title":"ICWI."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2012.135"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.03557"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599918"},{"key":"e_1_3_2_2_13_1","unstructured":"Yichao Zhou Ying Sheng Nguyen Vo Nick Edmonds and Sandeep Tata. 2021. Simplified dom trees for transferable attribute extraction from the web. arXiv preprint arXiv:2101.02415(2021)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737207","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T14:44:02Z","timestamp":1755355442000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737207"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":13,"alternative-id":["10.1145\/3711896.3737207","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737207","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}