From dad8a079ee149c870674cd5618d87fea214084fc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:07:58 +0000 Subject: [PATCH] Deployed 1ffe774 with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 500 ++ FAQ/index.html | 605 ++ api_reference/DDataflow/index.html | 2964 ++++++++ api_reference/DataSource/index.html | 1136 +++ api_reference/DataSourceDownloader/index.html | 908 +++ api_reference/DataSources/index.html | 759 ++ assets/_mkdocstrings.css | 143 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.83f73b43.min.js | 16 + assets/javascripts/bundle.83f73b43.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.6ce7567c.min.js | 42 + .../workers/search.6ce7567c.min.js.map | 7 + assets/stylesheets/main.0253249f.min.css | 1 + assets/stylesheets/main.0253249f.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + ddataflow.png | Bin 0 -> 163415 bytes index.html | 736 ++ local_development/index.html | 635 ++ objects.inv | Bin 0 -> 473 bytes sampling/index.html | 635 ++ search/search_index.json | 1 + sitemap.xml | 39 + sitemap.xml.gz | Bin 0 -> 282 bytes troubleshooting/index.html | 540 ++ 60 files changed, 16878 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 FAQ/index.html create mode 100644 api_reference/DDataflow/index.html create mode 100644 api_reference/DataSource/index.html create mode 100644 api_reference/DataSourceDownloader/index.html create mode 100644 api_reference/DataSources/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.83f73b43.min.js create mode 100644 assets/javascripts/bundle.83f73b43.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js.map create mode 100644 assets/stylesheets/main.0253249f.min.css create mode 100644 assets/stylesheets/main.0253249f.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 ddataflow.png create mode 100644 index.html create mode 100644 local_development/index.html create mode 100644 objects.inv create mode 100644 sampling/index.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 troubleshooting/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..e24e971 --- /dev/null +++ b/404.html @@ -0,0 +1,500 @@ + + + +
+ + + + + + + + + + + + + + +After installing ddataflow run the configure producedure in your installed machine
+databricks configure --token
+
Follow the wizard until the end.
+ + + + + + + + + + + + + +DDataflow is an end2end tests solution. +See our docs manual for more details. +Additionally, use help(ddataflow) to see the available methods.
+ + + + + + +ddataflow/ddataflow.py
22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 |
|
__init__(project_folder_name, data_sources=None, data_writers=None, data_source_size_limit_gb=1, enable_ddataflow=False, sources_with_default_sampling=None, snapshot_path=None, default_sampler=None, default_database=None)
+
+Initialize the dataflow object. +The input of this object is the config dictionary outlined in our integrator manual.
+Important params: +project_folder_name: + the name of the project that will be stored in the disk +snapshot_path: + path to the snapshot folder +data_source_size_limit_gb: + limit the size of the data sources +default_sampler: + options to pass to the default sampler +sources_with_default_sampling: + if you have tables you want to have by default and dont want to sample them first +default_database: + name of the default database. If ddataflow is enabled, a test db will be created and used. +sources_with_default_sampling : + Deprecated: use sources with default_sampling=True instead + if you have tables you want to have by default and dont want to sample them first
+ +ddataflow/ddataflow.py
37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 |
|
current_project()
+
+
+ staticmethod
+
+
+Returns a ddataflow configured with the current directory configuration file +Requirements for this to work:
+@todo investigate if we can use import_class_from_string
+ +ddataflow/ddataflow.py
135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 |
|
disable()
+
+Disable ddtaflow overriding tables, uses production state in other words
+ +ddataflow/ddataflow.py
291 +292 +293 |
|
disable_offline()
+
+Programatically enable offline mode
+ +ddataflow/ddataflow.py
217 +218 +219 |
|
download_data_sources(overwrite=True, debug=False)
+
+Download the data sources locally for development offline +Note: you need databricks-cli for this command to work
+Options: + overwrite: will first clean the existing files
+ +ddataflow/ddataflow.py
314 +315 +316 +317 +318 +319 +320 +321 +322 |
|
enable()
+
+When enabled ddataflow will read from the filtered data sources +instead of production tables. And write to testing tables instead of production ones.
+ +ddataflow/ddataflow.py
198 +199 +200 +201 +202 +203 +204 |
|
enable_offline()
+
+Programatically enable offline mode
+ +ddataflow/ddataflow.py
209 +210 +211 +212 |
|
get_mlflow_path(original_path)
+
+overrides the mlflow path if
+ +ddataflow/ddataflow.py
389 +390 +391 +392 +393 +394 +395 +396 +397 +398 |
|
is_enabled()
+
+To be enabled ddataflow has to be either in offline mode or with enable=True
+ +ddataflow/ddataflow.py
409 +410 +411 +412 +413 |
|
name(*args, **kwargs)
+
+A shorthand for source_name
+ +ddataflow/ddataflow.py
285 +286 +287 +288 +289 |
|
path(path)
+
+returns a deterministic path replacing the real production path with one based on the current environment needs. +Currently support path starts with 'dbfs:/' and 's3://'.
+ +ddataflow/ddataflow.py
249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 |
|
print_status()
+
+Print the status of the ddataflow
+ +ddataflow/ddataflow.py
415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 |
|
read(name)
+
+Read the data writers parquet file which are stored in the ddataflow folder
+ +ddataflow/ddataflow.py
358 +359 +360 +361 +362 +363 +364 +365 +366 +367 |
|
sample_and_download(ask_confirmation=True, overwrite=True)
+
+Create a sample folder in dbfs and then downloads it in the local machine
+ +ddataflow/ddataflow.py
324 +325 +326 +327 +328 +329 +330 +331 |
|
set_logger_level(level)
+
+Set logger level. +Levels can be found here: https://docs.python.org/3/library/logging.html#logging-levels
+ +ddataflow/ddataflow.py
448 +449 +450 +451 +452 +453 +454 |
|
set_up_database(db_name)
+
+Perform USE $DATABASE query to set up a default database. +If ddataflow is enabled, use a test db to prevent writing data into production.
+ +ddataflow/ddataflow.py
264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 |
|
setup_project()
+
+
+ staticmethod
+
+
+Sets up a new ddataflow project with empty data sources in the current directory
+ +ddataflow/ddataflow.py
126 +127 +128 +129 +130 +131 +132 +133 |
|
source(name, debugger=False)
+
+Gives access to the data source configured in the dataflow
+You can also use this function in the terminal with --debugger=True to inspect the dataframe.
+ +ddataflow/ddataflow.py
175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 |
|
source_name(name, disable_view_creation=False)
+
+Given the name of a production table, returns the name of the corresponding ddataflow table when ddataflow is enabled +If ddataflow is disabled get the production one.
+ +ddataflow/ddataflow.py
221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 |
|
write(df, name)
+
+Write a dataframe either to a local folder or the production one
+ +ddataflow/ddataflow.py
333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 |
|
DataSource
+
+
+Utility functions at data source level
+ + + + + + +ddataflow/data_source.py
11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 |
|
estimate_size_and_fail_if_too_big()
+
+Estimate the size of the data source use the _name used in the _config +It will throw an exception if the estimated size is bigger than the maximum allowed in the configuration
+ +ddataflow/data_source.py
105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 |
|
query()
+
+query with filter unless none is present
+ +ddataflow/data_source.py
47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 |
|
query_without_filter()
+
+Go to the raw data source without any filtering
+ +ddataflow/data_source.py
64 +65 +66 +67 +68 +69 +70 |
|
DataSourceDownloader
+
+
+ddataflow/downloader.py
8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 |
|
download_all(data_sources, overwrite=True, debug=False)
+
+Download the data sources locally for development offline +Note: you need databricks-cli for this command to work
+Options: + overwrite: will first clean the existing files
+ +ddataflow/downloader.py
12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 |
|
DataSources
+
+
+Validates and Abstract the access to data sources
+ + + + + + +ddataflow/data_sources.py
6 + 7 + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 |
|