-
Notifications
You must be signed in to change notification settings - Fork 0
/
airflow-docker-compose.yaml
150 lines (139 loc) · 3.73 KB
/
airflow-docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
version: '3'
x-airflow-common:
&airflow-common
build:
context: .
dockerfile: airflow-dockerfile
environment:
&airflow-common-env
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
# Scan for DAGs every 60 seconds
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '60'
AIRFLOW__WEBSERVER__SECRET_KEY: '3d6f45a5fc12445dbac2f59c3b6c7cb1'
# Prevent airflow from reloading the dags all the time and set:
AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL: '60'
# 2 * NUM_CPU_CORES + 1
AIRFLOW__WEBSERVER__WORKERS: '2'
# Kill workers if they don't start within 5min instead of 2min
AIRFLOW__WEBSERVER__WEB_SERVER_WORKER_TIMEOUT: '300'
AIRFLOW_CONN_POSTGRES_TWEETS: postgresql+psycopg2://postgres:postgres@database/tweets
volumes:
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./plugins:/opt/airflow/plugins
env_file:
- ./.env
user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
mem_limit: 4000m
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- etl_network
services:
postgres:
image: postgres:13
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
volumes:
- postgres-db-volume:/var/lib/postgresql/data
healthcheck:
test: [ "CMD", "pg_isready", "-U", "airflow" ]
interval: 5s
retries: 5
restart: always
networks:
- etl_network
redis:
image: redis:latest
ports:
- 6379:6379
healthcheck:
test: [ "CMD", "redis-cli", "ping" ]
interval: 5s
timeout: 30s
retries: 50
restart: always
mem_limit: 4000m
networks:
- etl_network
airflow-webserver:
<<: *airflow-common
command: webserver
ports:
- 8080:8080
healthcheck:
test:
[
"CMD",
"curl",
"--fail",
"http://localhost:8080/health"
]
interval: 10s
timeout: 10s
retries: 5
restart: always
airflow-scheduler:
<<: *airflow-common
command: scheduler
healthcheck:
test:
[
"CMD-SHELL",
'airflow jobs check --job-type SchedulerJob --hostname
"$${HOSTNAME}"'
]
interval: 10s
timeout: 10s
retries: 5
restart: always
airflow-worker:
<<: *airflow-common
command: celery worker
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.executors.celery_executor.app inspect ping -d
"celery@$${HOSTNAME}"'
interval: 10s
timeout: 10s
retries: 5
restart: always
airflow-init:
<<: *airflow-common
command: version
environment:
<<: *airflow-common-env
_AIRFLOW_DB_UPGRADE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
flower:
<<: *airflow-common
command: celery flower
ports:
- 5555:5555
healthcheck:
test: [ "CMD", "curl", "--fail", "http://localhost:5555/" ]
interval: 10s
timeout: 10s
retries: 5
restart: always
mem_limit: 4000m
volumes:
postgres-db-volume: null
# Create a custom network for bridging the containers
networks:
etl_network: null