{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:59:58Z","timestamp":1772906398216,"version":"3.50.1"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,15]]},"DOI":"10.1109\/bigdata62323.2024.10825377","type":"proceedings-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:31:23Z","timestamp":1737052283000},"page":"3562-3567","source":"Crossref","is-referenced-by-count":3,"title":["FaaS and Furious: abstractions and differential caching for efficient data pre-processing"],"prefix":"10.1109","author":[{"given":"Jacopo","family":"Tagliabue","sequence":"first","affiliation":[{"name":"Bauplan Labs,New York,US"}]},{"given":"Ryan","family":"Curtin","sequence":"additional","affiliation":[{"name":"Bauplan Labs,Atlanta,US"}]},{"given":"Ciro","family":"Greco","sequence":"additional","affiliation":[{"name":"Bauplan Labs,New York,US"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Global big data & analytics market by component","year":"2024"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445518"},{"key":"ref3","article-title":"Lakehouse: A new generation of open platforms that unify data warehousing and advanced analytics","author":"Zaharia","year":"2021","journal-title":"Conference on Innovative Data Systems Research"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407807"},{"key":"ref5","article-title":"Operationalizing machine learning: An interview study","author":"Shankar","year":"2022"},{"key":"ref6","article-title":"Reasonable scale machine learning with open-source metaflow","author":"Tagliabue","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474604"},{"key":"ref8","article-title":"Global data pipeline tools market by component","year":"2024"},{"key":"ref9","article-title":"Airflow","year":"2024"},{"key":"ref10","article-title":"Luigi","year":"2024"},{"key":"ref11","article-title":"Parquet","year":"2024"},{"key":"ref12","article-title":"Iceberg","year":"2024"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3650203.3663335"},{"key":"ref14","article-title":"Building a serverless data lakehouse from spare parts","author":"Tagliabue","year":"2023"},{"key":"ref15","article-title":"Bauplan: zero-copy, scale-up faas for data pipelines","volume-title":"Proceedings of the 25th International Middleware Conference: Demos, Posters and Doctoral Symposium, ser. Middleware \u201924","author":"Tagliabue"},{"key":"ref16","article-title":"Build end-to-end machine learning workflows with amazon sagemaker and apache airflow","author":"Thallam","year":"2024"},{"key":"ref17","article-title":"Arrow","year":"2024"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3626246.3653395"},{"key":"ref19","article-title":"How developers iterate on machine learning workflows - a survey of the applied machine learning literature","author":"Xin","year":"2018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.14778\/3681954.3682031"},{"key":"ref21","article-title":"Sqlglot","year":"2024"},{"key":"ref22","article-title":"dbt-core","year":"2024"},{"key":"ref23","first-page":"330","article-title":"Semantic data caching and replacement","volume-title":"Proceedings of the 22th International Conference on Very Large Data Bases, ser. VLDB \u201996","author":"Dar"},{"key":"ref24","first-page":"901","article-title":"Data caching for Enterprise-Grade Petabyte-Scale OLAP","volume-title":"2024 USENIX Annual Technical Conference (USENIX ATC 24)","author":"Tang"},{"key":"ref25","article-title":"Differential storage: A key building block for a duckdb-based data warehouse","author":"Hwang","year":"2024"}],"event":{"name":"2024 IEEE International Conference on Big Data (BigData)","location":"Washington, DC, USA","start":{"date-parts":[[2024,12,15]]},"end":{"date-parts":[[2024,12,18]]}},"container-title":["2024 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10824975\/10824942\/10825377.pdf?arnumber=10825377","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T08:27:43Z","timestamp":1737102463000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10825377\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,15]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/bigdata62323.2024.10825377","relation":{},"subject":[],"published":{"date-parts":[[2024,12,15]]}}}