5

I am trying to setup Apache Airflow in docker, for local development. I have it working on the intel based Mac.

I am following their official documentation. When I try to run the services with docker compose up I see following traceback.

# docker-compose.yaml
---
version: '3'
x-airflow-common:
  &airflow-common
  image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.1.0}
  environment:
    &airflow-common-env
    AIRFLOW__CORE__EXECUTOR: CeleryExecutor
    AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
    AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
    AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
    AIRFLOW__CORE__FERNET_KEY: ''
    AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
    AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
    AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
  volumes:
    - ./dags:/opt/airflow/dags
    - ./logs:/opt/airflow/logs
    - ./plugins:/opt/airflow/plugins
  user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
  depends_on:
    redis:
      condition: service_healthy
    postgres:
      condition: service_healthy

services:
  postgres:
    image: postgres:13
#    platform: linux/amd64
    environment:
      POSTGRES_USER: airflow
      POSTGRES_PASSWORD: airflow
      POSTGRES_DB: airflow
    volumes:
      - postgres-db-volume:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "airflow"]
      interval: 5s
      retries: 5
    restart: always

  redis:
    image: redis:latest
    ports:
      - 6379:6379
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 30s
      retries: 50
    restart: always

  airflow-webserver:
    <<: *airflow-common
    command: webserver
    ports:
      - 8080:8080
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

  airflow-scheduler:
    <<: *airflow-common
    command: scheduler
    healthcheck:
      test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

  airflow-worker:
    <<: *airflow-common
    command: celery worker
    healthcheck:
      test:
        - "CMD-SHELL"
        - 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

  airflow-init:
    <<: *airflow-common
    command: version
    environment:
      <<: *airflow-common-env
      _AIRFLOW_DB_UPGRADE: 'true'
      _AIRFLOW_WWW_USER_CREATE: 'true'
      _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
      _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}

  flower:
    <<: *airflow-common
    command: celery flower
    ports:
      - 5555:5555
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

volumes:
  postgres-db-volume:

Here's the traceback of docker compose up

[+] Running 7/7
 ⠿ Container test_redis_1              Recreated                                                                                                                                                                                         1.2s
 ⠿ Container test_postgres_1           Recreated                                                                                                                                                                                         0.2s
 ⠿ Container test_airflow-worker_1     Created                                                                                                                                                                                           0.1s
 ⠿ Container test_airflow-init_1       Recreated                                                                                                                                                                                         0.2s
 ⠿ Container test_airflow-webserver_1  Created                                                                                                                                                                                           0.1s
 ⠿ Container test_flower_1             Created                                                                                                                                                                                           0.1s
 ⠿ Container test_airflow-scheduler_1  Created                                                                                                                                                                                           0.1s
Attaching to airflow-init_1, airflow-scheduler_1, airflow-webserver_1, airflow-worker_1, flower_1, postgres_1, redis_1
postgres_1           | 
postgres_1           | PostgreSQL Database directory appears to contain a database; Skipping initialization
postgres_1           | 
postgres_1           | 2021-06-05 19:35:20.148 UTC [1] LOG:  starting PostgreSQL 13.3 (Debian 13.3-1.pgdg100+1) on aarch64-unknown-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit
postgres_1           | 2021-06-05 19:35:20.148 UTC [1] LOG:  listening on IPv4 address "0.0.0.0", port 5432
postgres_1           | 2021-06-05 19:35:20.148 UTC [1] LOG:  listening on IPv6 address "::", port 5432
postgres_1           | 2021-06-05 19:35:20.150 UTC [1] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432"
postgres_1           | 2021-06-05 19:35:20.154 UTC [27] LOG:  database system was shut down at 2021-06-05 19:35:16 UTC
postgres_1           | 2021-06-05 19:35:20.158 UTC [1] LOG:  database system is ready to accept connections
redis_1              | 1:C 05 Jun 2021 19:35:21.647 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
redis_1              | 1:C 05 Jun 2021 19:35:21.647 # Redis version=6.2.4, bits=64, commit=00000000, modified=0, pid=1, just started
redis_1              | 1:C 05 Jun 2021 19:35:21.647 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
redis_1              | 1:M 05 Jun 2021 19:35:21.647 * monotonic clock: POSIX clock_gettime
redis_1              | 1:M 05 Jun 2021 19:35:21.647 * Running mode=standalone, port=6379.
redis_1              | 1:M 05 Jun 2021 19:35:21.647 # Server initialized
redis_1              | 1:M 05 Jun 2021 19:35:21.649 * Ready to accept connections
airflow-init_1       | BACKEND=postgresql+psycopg2
airflow-init_1       | DB_HOST=postgres
airflow-init_1       | DB_PORT=5432
airflow-init_1       | 
airflow-scheduler_1  | BACKEND=postgresql+psycopg2
airflow-scheduler_1  | DB_HOST=postgres
airflow-scheduler_1  | DB_PORT=5432
airflow-scheduler_1  | 
airflow-scheduler_1  | BACKEND=postgresql+psycopg2
airflow-scheduler_1  | DB_HOST=postgres
airflow-scheduler_1  | DB_PORT=5432
airflow-scheduler_1  | 
airflow-worker_1     | BACKEND=postgresql+psycopg2
airflow-worker_1     | DB_HOST=postgres
airflow-worker_1     | DB_PORT=5432
flower_1             | BACKEND=postgresql+psycopg2
flower_1             | DB_HOST=postgres
flower_1             | DB_PORT=5432
airflow-worker_1     | 
airflow-worker_1     | BACKEND=postgresql+psycopg2
airflow-worker_1     | DB_HOST=postgres
airflow-worker_1     | DB_PORT=5432
flower_1             | 
flower_1             | BACKEND=postgresql+psycopg2
flower_1             | DB_HOST=postgres
flower_1             | DB_PORT=5432
airflow-webserver_1  | BACKEND=postgresql+psycopg2
airflow-webserver_1  | DB_HOST=postgres
airflow-webserver_1  | DB_PORT=5432
airflow-worker_1     | 
flower_1             | 
airflow-webserver_1  | 
airflow-init_1       | DB: postgresql+psycopg2://airflow:***@postgres/airflow
airflow-init_1       | [2021-06-05 19:35:59,163] {db.py:695} INFO - Creating tables
airflow-init_1       | INFO  [alembic.runtime.migration] Context impl PostgresqlImpl.
airflow-init_1       | INFO  [alembic.runtime.migration] Will assume transactional DDL.
flower_1             | [2021-06-05 19:36:08,037] {command.py:137} INFO - Visit me at http://0.0.0.0:5555
flower_1             | [2021-06-05 19:36:08,359] {command.py:142} INFO - Broker: redis://redis:6379/0
flower_1             | [2021-06-05 19:36:08,391] {command.py:145} INFO - Registered tasks: 
flower_1             | ['airflow.executors.celery_executor.execute_command',
flower_1             |  'celery.accumulate',
flower_1             |  'celery.backend_cleanup',
flower_1             |  'celery.chain',
flower_1             |  'celery.chord',
flower_1             |  'celery.chord_unlock',
flower_1             |  'celery.chunks',
flower_1             |  'celery.group',
flower_1             |  'celery.map',
flower_1             |  'celery.starmap']
flower_1             | [2021-06-05 19:36:08,666] {mixins.py:229} INFO - Connected to redis://redis:6379/0
flower_1             | [2021-06-05 19:36:11,593] {inspector.py:42} WARNING - Inspect method scheduled failed
flower_1             | [2021-06-05 19:36:11,609] {inspector.py:42} WARNING - Inspect method conf failed
flower_1             | [2021-06-05 19:36:11,617] {inspector.py:42} WARNING - Inspect method reserved failed
flower_1             | [2021-06-05 19:36:11,619] {inspector.py:42} WARNING - Inspect method registered failed
flower_1             | [2021-06-05 19:36:11,655] {inspector.py:42} WARNING - Inspect method active_queues failed
flower_1             | [2021-06-05 19:36:11,659] {inspector.py:42} WARNING - Inspect method stats failed
flower_1             | [2021-06-05 19:36:11,662] {inspector.py:42} WARNING - Inspect method revoked failed
flower_1             | [2021-06-05 19:36:11,664] {inspector.py:42} WARNING - Inspect method active failed
airflow-scheduler_1  |   ____________       _____________
airflow-scheduler_1  |  ____    |__( )_________  __/__  /________      __
airflow-scheduler_1  | ____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /
airflow-scheduler_1  | ___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /
airflow-scheduler_1  |  _/_/  |_/_/  /_/    /_/    /_/  \____/____/|__/
airflow-scheduler_1  | [2021-06-05 19:36:19,515] {scheduler_job.py:1253} INFO - Starting the scheduler
airflow-scheduler_1  | [2021-06-05 19:36:19,527] {scheduler_job.py:1258} INFO - Processing each file at most -1 times
airflow-scheduler_1  | [2021-06-05 19:36:19,588] {dag_processing.py:254} INFO - Launched DagFileProcessorManager with pid: 107
airflow-scheduler_1  | [2021-06-05 19:36:19,604] {scheduler_job.py:1822} INFO - Resetting orphaned tasks for active dag runs
airflow-scheduler_1  | [2021-06-05 19:36:19,657] {settings.py:52} INFO - Configured default timezone Timezone('UTC')
airflow-worker_1     | Starting flask
airflow-worker_1     |  * Serving Flask app "airflow.utils.serve_logs" (lazy loading)
airflow-worker_1     |  * Environment: production
airflow-worker_1     |    WARNING: This is a development server. Do not use it in a production deployment.
airflow-worker_1     |    Use a production WSGI server instead.
airflow-worker_1     |  * Debug mode: off
airflow-worker_1     | [2021-06-05 19:36:28,740] {_internal.py:113} INFO -  * Running on http://0.0.0.0:8793/ (Press CTRL+C to quit)
airflow-init_1       | Upgrades done
airflow-worker_1     | /home/airflow/.local/lib/python3.6/site-packages/celery/platforms.py:801 RuntimeWarning: You're running the worker with superuser privileges: this is
airflow-worker_1     | absolutely not recommended!
airflow-worker_1     | 
airflow-worker_1     | Please specify a different user using the --uid option.
airflow-worker_1     | 
airflow-worker_1     | User information: uid=501 euid=501 gid=0 egid=0
airflow-worker_1     | 
airflow-webserver_1  |   ____________       _____________
airflow-webserver_1  |  ____    |__( )_________  __/__  /________      __
airflow-webserver_1  | ____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /
airflow-webserver_1  | ___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /
airflow-webserver_1  |  _/_/  |_/_/  /_/    /_/    /_/  \____/____/|__/
airflow-webserver_1  | [2021-06-05 19:36:32,432] {dagbag.py:487} INFO - Filling up the DagBag from /dev/null
postgres_1           | 2021-06-05 19:36:38.577 UTC [150] LOG:  unexpected EOF on client connection with an open transaction
airflow-worker_1     | [2021-06-05 19:36:39,670: INFO/MainProcess] Connected to redis://redis:6379/0
airflow-worker_1     | [2021-06-05 19:36:39,819: INFO/MainProcess] mingle: searching for neighbors
airflow-webserver_1 exited with code 137
airflow-worker_1     | [2021-06-05 19:36:41,084: INFO/MainProcess] mingle: all alone
airflow-worker_1     | [2021-06-05 19:36:41,196: INFO/MainProcess] celery@a2177ac7a506 ready.
postgres_1           | 2021-06-05 19:36:43.263 UTC [131] LOG:  unexpected EOF on client connection with an open transaction
airflow-worker_1     | [2021-06-05 19:36:43,502: INFO/MainProcess] Events of group {task} enabled by remote.
postgres_1           | 2021-06-05 19:36:48.829 UTC [132] LOG:  unexpected EOF on client connection with an open transaction
flower_1 exited with code 137
airflow-worker_1 exited with code 137

And then it goes into loop with retries. For an experiment, when I try to run services one by one, I observed that as soon as I start the airflow-worker all other services start failing. airflow-webservice workers die immediately and tries to respawn again but cannot.

2
  • M1 macs are still new thing. I would guess that it's more fo docker/runtime issue than Airflow - the idea of docker was to make one compose file work everywhere and it seems that it's not yet true on M1. Still I would consider creating a bug report in Airflow repo so we can track this issue in future. Commented Jun 5, 2021 at 22:46
  • 1
    Somehow it was fixed by allocating more resources to the docker, could be because of insufficient memory. There is already an issue open on airflow regarding M1 arm64 image, where someone mentioned the same fix.
    – Sadan A.
    Commented Jun 6, 2021 at 23:04

1 Answer 1

6

I had the same issue. Increasing the allocation of resources in docker preference fixed the issue for me. CPU 2 -> 4, Memory 2GB -> 6GB, SWAP 1GB -> 2GB.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.