Skip to content

Trino BYOA example

This example shows how to deploy a Trino SaaS in your customers account using BYOA mode.

Watch the video

Trino Architecture

Trino is an open source distributed SQL query engine for big data analytics, for running interactive analytic queries against data sources of all sizes ranging from gigabytes to petabytes. Here's a diagram of the Trino architecture we will be deploying:

Trino Architecture

Getting started via compose spec

To get started via compose spec, provided below we have a sample that you can use to deploy a simple instance of Trino, note, this is not BYOA mode yet, scroll down for that.

As for the images you can choose your preferred version of Postgres, Trino, Hive images. We are leveraging our custom ones that you can find in our Docker Hub but you can also use different ones as long as they are compatible.

# logo: https://trino.io/assets/images/trino-logo/trino-ko_tiny-alt.svg
# description: Trino is a distributed SQL query engine for big data analytics.

version: '4.2'
x-omnistrate-my-account:
  AwsAccountId: '<your account ID>'
  AwsBootstrapRoleAccountArn: 'arn:aws:iam::<your-account-id>:role/omnistrate-bootstrap-role'
volumes:
    hivedb: {}
    coordinatordb: {}
    workerdb: {}
    supersetdb: {}
services:
    postgres:
      x-omnistrate-capabilities:
        networkType: INTERNAL
      image: postgres:9
      x-omnistrate-api-params:
        - key: postgresqlPassword
          description: Default DB Password
          name: Password
          type: String
          modifiable: false
          required: true
          export: false
          defaultValue: ""
      environment:
      - SECURITY_CONTEXT_USER_ID=999
      - SECURITY_CONTEXT_GROUP_ID=999
      - POSTGRES_USER=postgres
      - POSTGRES_PASSWORD=$var.postgresqlPassword
      - PGDATA=/var/lib/postgresql/data/dbdata
      volumes:
        - hivedb:/var/lib/postgresql/data
      x-omnistrate-actionhooks:
      - scope: CLUSTER
        type: INIT
        commandTemplate: |
          PGPASSWORD={{ $var.postgresqlPassword }} psql -h postgres -U postgres -d postgres -c "CREATE DATABASE hive;"
          PGPASSWORD={{ $var.postgresqlPassword }} psql -h postgres -U postgres -d postgres -c "CREATE DATABASE superset;"
      ports:
        - "5432:5432"
      networks:
        ntrino:
          aliases:
            - postgres
      healthcheck:
          test: ["CMD-SHELL", "pg_isready -U postgres"]
          interval: "20s"
          timeout: "20s"
          retries: 3
      x-omnistrate-mode-internal: true

    hive:
      image: omnistrate/trino-hive:11.0
      container_name: hive
      x-omnistrate-api-params: 
      - key: postgresqlPassword
        description: Default DB Password
        name: Password
        type: String
        modifiable: false
        required: true
        export: false
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
      environment:
        - POSTGRES_PASSWORD=$var.postgresqlPassword
        - BUCKET_ACCESS_ROLE_ARN=$var.bucketAccessRoleARN
      volumes:
        - ./hive/scratch:/tmp/hive
        - ./hive/warehouse:/user/hive
        - ./hive/output:/opt/data/output
      ports:
        - "10000:10000"
        - "9083:9083"
      networks:
        ntrino:
          aliases:
            - hive
      x-omnistrate-mode-internal: true

    coordinator:
      x-omnistrate-capabilities:
        httpReverseProxy:
          targetPort: 8080
        enableMultiZone: true
        enableEndpointPerReplica: false
      x-omnistrate-api-params:
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
      image: omnistrate/trino-coordinator:11.15
      container_name: coordinator
      ports:
        - "8080:8080"
      expose:
        - "8080"
      environment:
        - BUCKET_ACCESS_ROLE_ARN=$var.bucketAccessRoleARN
      volumes:
        - coordinatordb:/data/trino
        - ./hive/output:/opt/data/output
      networks:
        ntrino:
          aliases:
            - coordinator
      healthcheck:
          test: ["CMD-SHELL", "curl -sS http://localhost:8080/|| exit 1"]
          interval: "20s"
          timeout: "20s"
          retries: 3
      x-omnistrate-mode-internal: true

    worker:
      x-omnistrate-capabilities:
        httpReverseProxy:
          targetPort: 8080
        enableMultiZone: true
        enableEndpointPerReplica: false
      x-omnistrate-api-params:
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
      image: omnistrate/trino-worker:11.15
      ports:
        - "8080:8080"
      environment:
        - BUCKET_ACCESS_ROLE_ARN=$var.bucketAccessRoleARN
      volumes:
        - workerdb:/data/trino
        - ./hive/output:/opt/data/output
      networks:
        ntrino:
          aliases:
            - worker
      x-omnistrate-mode-internal: true

    superset:
      x-omnistrate-capabilities:
        httpReverseProxy:
          targetPort: 8088
        enableMultiZone: true
        enableEndpointPerReplica: false
      image: omnistrate/superset:latest
      container_name: superset
      environment:
        - DATA_DIR=/opt/superset/data
        - SUPERSET_SECRET_KEY=HAjeudha2uahde*@Hau&@1
        - SECURITY_CONTEXT_USER_ID=0
        - SECURITY_CONTEXT_GROUP_ID=0
        - SECURITY_CONTEXT_FS_GROUP=0
      volumes:
      - supersetdb:/opt/superset/data
      ports:
        - "8088:8088"
      networks:
        ntrino:
          aliases:
            - superset
      x-omnistrate-mode-internal: true

    # trino-proxy:
    #   image: omnistrate/trino-proxy:latest
    #   container_name: "trino-proxy"
    #   networks:
    #     ntrino:
    #       aliases:
    #         - trino-proxy
    #   expose:
    #     - "8453"
    #     - "8001"
    #   ports:
    #     - "8453:8453"
    #     - "8001:8001"
    #   x-omnistrate-mode-internal: true

    Cluster:
      image: omnistrate/noop
      x-omnistrate-api-params:
      - key: postgresqlPassword
        description: Default DB Password
        name: Password
        type: String
        modifiable: false
        required: true
        export: false
        parameterDependencyMap:
          postgres: postgresqlPassword
          hive: postgresqlPassword
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
        parameterDependencyMap:
          hive: bucketAccessRoleARN
          coordinator: bucketAccessRoleARN
          worker: bucketAccessRoleARN
      depends_on:
        - postgres
        - hive
        - coordinator
        - worker
        - superset
        # - trino-proxy
      x-omnistrate-mode-internal: false

networks:
  ntrino: {}

Deploy in BYOA mode

To deploy in BYOA mode, you need to set a customer account as follows:

version: '4.2'
x-omnistrate-byoa:
  AwsAccountId: '<your account ID>'
  AwsBootstrapRoleAccountArn: 'arn:aws:iam::<your-account-id>:role/omnistrate-bootstrap-role'
  GcpProjectId: '<your project ID>'
  GcpProjectNumber: '<your project number>'
  GcpServiceAccountEmail: '<your service account email>'

This goes instead of setting the x-omnistrate-my-account param, so the final yaml looks like this:

version: '4.2'
x-omnistrate-byoa:
  AwsAccountId: '<your account ID>'
  AwsBootstrapRoleAccountArn: 'arn:aws:iam::<your-account-id>:role/omnistrate-bootstrap-role'
  GcpProjectId: '<your project ID>'
  GcpProjectNumber: '<your project number>'
  GcpServiceAccountEmail: '<your service account email>'
volumes:
    hivedb: {}
    coordinatordb: {}
    workerdb: {}
    supersetdb: {}
services:
    postgres:
      x-omnistrate-capabilities:
        networkType: INTERNAL
      image: postgres:9
      x-omnistrate-api-params:
        - key: postgresqlPassword
          description: Default DB Password
          name: Password
          type: String
          modifiable: false
          required: true
          export: false
          defaultValue: ""
      environment:
      - SECURITY_CONTEXT_USER_ID=999
      - SECURITY_CONTEXT_GROUP_ID=999
      - POSTGRES_USER=postgres
      - POSTGRES_PASSWORD=$var.postgresqlPassword
      - PGDATA=/var/lib/postgresql/data/dbdata
      volumes:
        - hivedb:/var/lib/postgresql/data
      x-omnistrate-actionhooks:
      - scope: CLUSTER
        type: INIT
        commandTemplate: |
          PGPASSWORD={{ $var.postgresqlPassword }} psql -h postgres -U postgres -d postgres -c "CREATE DATABASE hive;"
          PGPASSWORD={{ $var.postgresqlPassword }} psql -h postgres -U postgres -d postgres -c "CREATE DATABASE superset;"
      ports:
        - "5432:5432"
      networks:
        ntrino:
          aliases:
            - postgres
      healthcheck:
          test: ["CMD-SHELL", "pg_isready -U postgres"]
          interval: "20s"
          timeout: "20s"
          retries: 3
      x-omnistrate-mode-internal: true

    hive:
      image: omnistrate/trino-hive:11.0
      container_name: hive
      x-omnistrate-api-params: 
      - key: postgresqlPassword
        description: Default DB Password
        name: Password
        type: String
        modifiable: false
        required: true
        export: false
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
      environment:
        - POSTGRES_PASSWORD=$var.postgresqlPassword
        - BUCKET_ACCESS_ROLE_ARN=$var.bucketAccessRoleARN
      volumes:
        - ./hive/scratch:/tmp/hive
        - ./hive/warehouse:/user/hive
        - ./hive/output:/opt/data/output
      ports:
        - "10000:10000"
        - "9083:9083"
      networks:
        ntrino:
          aliases:
            - hive
      x-omnistrate-mode-internal: true

    coordinator:
      x-omnistrate-capabilities:
        httpReverseProxy:
          targetPort: 8080
        enableMultiZone: true
        enableEndpointPerReplica: false
      x-omnistrate-api-params:
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
      image: omnistrate/trino-coordinator:11.15
      container_name: coordinator
      ports:
        - "8080:8080"
      expose:
        - "8080"
      environment:
        - BUCKET_ACCESS_ROLE_ARN=$var.bucketAccessRoleARN
      volumes:
        - coordinatordb:/data/trino
        - ./hive/output:/opt/data/output
      networks:
        ntrino:
          aliases:
            - coordinator
      healthcheck:
          test: ["CMD-SHELL", "curl -sS http://localhost:8080/|| exit 1"]
          interval: "20s"
          timeout: "20s"
          retries: 3
      x-omnistrate-mode-internal: true

    worker:
      x-omnistrate-capabilities:
        httpReverseProxy:
          targetPort: 8080
        enableMultiZone: true
        enableEndpointPerReplica: false
      x-omnistrate-api-params:
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
      image: omnistrate/trino-worker:11.15
      ports:
        - "8080:8080"
      environment:
        - BUCKET_ACCESS_ROLE_ARN=$var.bucketAccessRoleARN
      volumes:
        - workerdb:/data/trino
        - ./hive/output:/opt/data/output
      networks:
        ntrino:
          aliases:
            - worker
      x-omnistrate-mode-internal: true

    superset:
      x-omnistrate-capabilities:
        httpReverseProxy:
          targetPort: 8088
        enableMultiZone: true
        enableEndpointPerReplica: false
      image: omnistrate/superset:latest
      container_name: superset
      environment:
        - DATA_DIR=/opt/superset/data
        - SUPERSET_SECRET_KEY=HAjeudha2uahde*@Hau&@1
        - SECURITY_CONTEXT_USER_ID=0
        - SECURITY_CONTEXT_GROUP_ID=0
        - SECURITY_CONTEXT_FS_GROUP=0
      volumes:
      - supersetdb:/opt/superset/data
      ports:
        - "8088:8088"
      networks:
        ntrino:
          aliases:
            - superset
      x-omnistrate-mode-internal: true

    # trino-proxy:
    #   image: omnistrate/trino-proxy:latest
    #   container_name: "trino-proxy"
    #   networks:
    #     ntrino:
    #       aliases:
    #         - trino-proxy
    #   expose:
    #     - "8453"
    #     - "8001"
    #   ports:
    #     - "8453:8453"
    #     - "8001:8001"
    #   x-omnistrate-mode-internal: true

    Cluster:
      image: omnistrate/noop
      x-omnistrate-api-params:
      - key: postgresqlPassword
        description: Default DB Password
        name: Password
        type: String
        modifiable: false
        required: true
        export: false
        parameterDependencyMap:
          postgres: postgresqlPassword
          hive: postgresqlPassword
      - key: bucketAccessRoleARN
        description: Bucket Access Role ARN
        name: Bucket Access Role ARN
        type: String
        modifiable: true
        required: true
        export: true
        parameterDependencyMap:
          hive: bucketAccessRoleARN
          coordinator: bucketAccessRoleARN
          worker: bucketAccessRoleARN
      depends_on:
        - postgres
        - hive
        - coordinator
        - worker
        - superset
        # - trino-proxy
      x-omnistrate-mode-internal: false

networks:
  ntrino: {}

You can set your customer(s) account(s) by following the BYOA guide.

That's it! Now you can deploy your Trino stack in BYOA mode and set it up for as many customers as you want.