{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,16]],"date-time":"2025-04-16T10:49:49Z","timestamp":1744800589682,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662661109"},{"type":"electronic","value":"9783662661116"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-662-66111-6_4","type":"book-chapter","created":{"date-parts":[[2022,10,7]],"date-time":"2022-10-07T07:05:19Z","timestamp":1665126319000},"page":"97-118","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Efficiently Identifying Disguised Missing Values in\u00a0Heterogeneous, Text-Rich Data"],"prefix":"10.1007","author":[{"given":"Th\u00e9o","family":"Bouganim","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Helena","family":"Galhardas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ioana","family":"Manolescu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,8]]},"reference":[{"issue":"12","key":"4_CR1","doi-asserted-by":"publisher","first-page":"993","DOI":"10.14778\/2994509.2994518","volume":"9","author":"Z Abedjan","year":"2016","unstructured":"Abedjan, Z., et al.: Detecting data errors: where are we and what needs to be done? Proc. VLDB Endow. 9(12), 993\u20131004 (2016)","journal-title":"Proc. VLDB Endow."},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Abedjan, Z., Golab, L., Naumann, F., Papenbrock., T.: Data Profiling. Morgan and Claypool (2020)","DOI":"10.1007\/978-3-031-01865-7"},{"key":"4_CR3","volume-title":"Foundations of Databases","author":"S Abiteboul","year":"1995","unstructured":"Abiteboul, S., Hull, R., Vianu, V.: Foundations of Databases. Addison-Wesley, Boston (1995)"},{"key":"4_CR4","unstructured":"Akbik, A., Bergmann, T., Blythe, D., Rasul, K., Schweter, S., Vollgraf, R.: Flair: an easy-to-use framework for state-of-the-art NLP. In: ACL (2019)"},{"key":"4_CR5","unstructured":"Akbik, A., Blythe, D., Vollgraf, R.: Contextual string embeddings for sequence labeling. In: ACL (2018)"},{"key":"4_CR6","unstructured":"Anadiotis, A.-C., et al.: Empowering Investigative Journalism with Graph-based Heterogeneous Data Management. Bull. Tech. Committee Data Eng. (2021)"},{"key":"4_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2021.101846","volume":"104","author":"AC Anadiotis","year":"2021","unstructured":"Anadiotis, A.C., et al.: Graph integration of structured, semistructured and unstructured data for data journalism. Inf. Syst. 104, 101846 (2021)","journal-title":"Inf. Syst."},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Anadiotis, A.-C.G., et al.: Discovering conflicts of interest across heterogeneous data sources with connectionlens. In: CIKM 2021: The 30th ACM International Conference on Information and Knowledge Management, Virtual Event, Queensland, Australia, November 1\u20135, 2021, pp. 4670\u20134674. ACM (2021)","DOI":"10.1145\/3459637.3481982"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Bonaque, R., et al.: Mixed-instance querying: a lightweight integration architecture for data journalism. In: VLDB (2016)","DOI":"10.14778\/3007263.3007297"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Bouganim, T., Galhardas, H., Manolescu, I.: Efficiently identifying disguised nulls in heterogeneous text data. In: BDA (Conf\u00e9rence sur la Gestion de Donn\u00e9es - Principles, Technologies et Applications), Paris, France, October 2021. Informal publication only (2021)","DOI":"10.1007\/978-3-662-66111-6_4"},{"issue":"1","key":"4_CR11","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45(1), 5\u201332 (2001)","journal-title":"Mach. Learn."},{"issue":"12","key":"4_CR12","first-page":"4","volume":"11","author":"C Chanial","year":"2018","unstructured":"Chanial, C., et al.: ConnectionLens: finding connections across heterogeneous data sources (demonstration). PVLDB (also at BDA) 11(12), 4 (2018)","journal-title":"PVLDB (also at BDA)"},{"key":"4_CR13","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1145\/362384.362685","volume":"13","author":"EF Codd","year":"1970","unstructured":"Codd, E.F.: A relational model of data for large shared data banks. Commun. ACM 13, 377\u2013387 (1970)","journal-title":"Commun. ACM"},{"key":"4_CR14","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: ACL (2019)"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Galhardas, H., Florescu, D., Shasha, D.E., Simon, E.: Declaratively cleaning your data with AJAX. In: Doucet, A. (ed.) BDA (2000)","DOI":"10.1145\/342009.336568"},{"key":"4_CR16","unstructured":"Galhardas, H., Florescu, D., Shasha, D.E., Simon, E., Saita, C.-A.: Declarative data cleaning: language, model, and algorithms. In: VLDB (2001)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Hammer, J., Garcia-Molina, H., Ireland, K., Papakonstantinou, Y., Ullman, J.D., Widom, J.: Information translation, mediation, and mosaic-based browsing in the TSIMMIS system. In: SIGMOD (1995)","DOI":"10.1145\/223784.223896"},{"key":"4_CR18","volume-title":"Data Mining Concepts and Techniques","author":"J Han","year":"2011","unstructured":"Han, J., Kamber, M., Pei, J.: Data Mining Concepts and Techniques. Morgan Kaufmann, Waltham (2011)"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Hua, M., Pei, J.: Cleaning disguised missing data: a heuristic approach. In: SIGKDD (2007)","DOI":"10.1145\/1281192.1281294"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Ilyas, I.F., Soliman, M.A.: Probabilistic ranking techniques in relational databases. In: Synthesis Lectures on Data Management. Morgan & Claypool Publishers (2011)","DOI":"10.1007\/978-3-031-01846-6"},{"key":"4_CR21","unstructured":"Little, R.J.A., Rubin, D.B.: Statistical Analysis with Missing Data, vol. 793, 1st ed.. Wiley, New York (1987)"},{"issue":"12","key":"4_CR22","doi-asserted-by":"publisher","first-page":"1948","DOI":"10.14778\/3407790.3407801","volume":"13","author":"M Mahdavi","year":"2020","unstructured":"Mahdavi, M., Abedjan, Z.: Baran: Effective error correction via a unified context representation and transfer learning. Proc. VLDB Endow. 13(12), 1948\u20131961 (2020)","journal-title":"Proc. VLDB Endow."},{"key":"4_CR23","unstructured":"Mahdavi, M., et al.: A configuration-free error detection system. In: Proceedings of the 2019 International Conference on Management of Data, SIGMOD 2019, New York, NY, USA, 2019, pp. 865\u2013882. Association for Computing Machinery (2019)"},{"issue":"1","key":"4_CR24","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1145\/1147234.1147247","volume":"8","author":"RK Pearson","year":"2006","unstructured":"Pearson, R.K.: The problem of disguised missing data. SIGKDD Explor. Newsl. 8(1), 83\u201392 (2006)","journal-title":"SIGKDD Explor. Newsl."},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Qahtan, A.A., Elmagarmid, A., Fernandez, R.C., Ouzzani, M., Tang, N.: Fahes: a robust disguised missing values detector. In: SIGKDD (2018)","DOI":"10.1145\/3219819.3220109"},{"key":"4_CR26","unstructured":"Raman, V., Hellerstein, J.M.: Potter\u2019s wheel: an interactive data cleaning system. In: VLDB (2001)"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: Sentence embeddings using Siamese Bert-networks. In: EMNLP (2019)","DOI":"10.18653\/v1\/D19-1410"}],"container-title":["Lecture Notes in Computer Science","Transactions on Large-Scale Data- and Knowledge-Centered Systems LI"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-66111-6_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,22]],"date-time":"2023-02-22T08:57:13Z","timestamp":1677056233000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-662-66111-6_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783662661109","9783662661116"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-66111-6_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"8 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}