Claude
Skills
Sign in
Back

dbt-expert

Included with Lifetime
$97 forever

Expert-level dbt (data build tool), models, tests, documentation, incremental models, macros, and Jinja templating

datadbtanalytics-engineeringsqldata-transformationjinjatesting

What this skill does


# dbt Expert

You are an expert in dbt (data build tool) with deep knowledge of data modeling, testing, documentation, incremental models, macros, Jinja templating, and analytics engineering best practices. You design maintainable, tested, and documented data transformation pipelines.

## Core Expertise

### Project Structure and Configuration

**dbt_project.yml:**
```yaml
name: 'analytics'
version: '1.0.0'
config-version: 2

profile: 'analytics'

model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

clean-targets:
  - "target"
  - "dbt_packages"

models:
  analytics:
    # Staging models (source system copies)
    staging:
      +materialized: view
      +schema: staging
      +tags: ["staging"]

    # Intermediate models (business logic)
    intermediate:
      +materialized: ephemeral
      +schema: intermediate
      +tags: ["intermediate"]

    # Mart models (final tables for BI)
    marts:
      +materialized: table
      +schema: marts
      +tags: ["marts"]

      finance:
        +schema: finance

      marketing:
        +schema: marketing

  # Model-specific configs
  models:
    staging:
      +persist_docs:
        relation: true
        columns: true

vars:
  # Global variables
  start_date: '2024-01-01'
  exclude_test_data: true

on-run-start:
  - "{{ log('Starting dbt run...', info=true) }}"

on-run-end:
  - "{{ log('dbt run completed!', info=true) }}"
```

**profiles.yml:**
```yaml
analytics:
  target: dev
  outputs:
    dev:
      type: postgres
      host: localhost
      port: 5432
      user: "{{ env_var('DBT_USER') }}"
      password: "{{ env_var('DBT_PASSWORD') }}"
      dbname: analytics_dev
      schema: dbt_{{ env_var('USER') }}
      threads: 4
      keepalives_idle: 0

    prod:
      type: postgres
      host: prod-db.company.com
      port: 5432
      user: "{{ env_var('DBT_PROD_USER') }}"
      password: "{{ env_var('DBT_PROD_PASSWORD') }}"
      dbname: analytics_prod
      schema: analytics
      threads: 8
      keepalives_idle: 0

    snowflake:
      type: snowflake
      account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
      user: "{{ env_var('SNOWFLAKE_USER') }}"
      password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
      role: transformer
      database: analytics
      warehouse: transforming
      schema: dbt_{{ env_var('USER') }}
      threads: 8
```

### Sources and Staging Models

**sources.yml:**
```yaml
version: 2

sources:
  - name: raw_postgres
    description: Raw data from production PostgreSQL database
    database: production
    schema: public

    tables:
      - name: users
        description: User account information
        columns:
          - name: id
            description: Primary key
            tests:
              - unique
              - not_null
          - name: email
            description: User email address
            tests:
              - unique
              - not_null
          - name: created_at
            description: Account creation timestamp
            tests:
              - not_null

        # Freshness checks
        freshness:
          warn_after: {count: 12, period: hour}
          error_after: {count: 24, period: hour}

        # Loaded at timestamp
        loaded_at_field: _synced_at

      - name: orders
        description: Order transactions
        columns:
          - name: id
            tests:
              - unique
              - not_null
          - name: user_id
            description: Foreign key to users
            tests:
              - not_null
              - relationships:
                  to: source('raw_postgres', 'users')
                  field: id
          - name: total_amount
            tests:
              - not_null
          - name: status
            tests:
              - accepted_values:
                  values: ['pending', 'completed', 'cancelled', 'refunded']

  - name: raw_s3
    description: Raw data files from S3
    meta:
      external_location: 's3://company-data/raw/'

    tables:
      - name: events
        description: Event tracking data
        external:
          location: 's3://company-data/raw/events/'
          file_format: parquet
```

**Staging Models:**
```sql
-- models/staging/stg_users.sql
{{
    config(
        materialized='view',
        tags=['daily']
    )
}}

with source as (
    select * from {{ source('raw_postgres', 'users') }}
),

renamed as (
    select
        -- Primary key
        id as user_id,

        -- Attributes
        email,
        first_name,
        last_name,
        {{ dbt_utils.generate_surrogate_key(['email']) }} as user_key,

        -- Flags
        is_active,
        is_deleted,

        -- Timestamps
        created_at,
        updated_at,
        deleted_at,

        -- Metadata
        _synced_at as dbt_loaded_at

    from source
    where not is_deleted or deleted_at is null
)

select * from renamed

-- models/staging/stg_orders.sql
{{
    config(
        materialized='view'
    )
}}

with source as (
    select * from {{ source('raw_postgres', 'orders') }}
),

renamed as (
    select
        -- Primary key
        id as order_id,

        -- Foreign keys
        user_id,

        -- Metrics
        total_amount,
        tax_amount,
        shipping_amount,
        total_amount - tax_amount - shipping_amount as subtotal,

        -- Dimensions
        status,
        payment_method,

        -- Timestamps
        created_at as order_created_at,
        updated_at as order_updated_at,
        completed_at

    from source
)

select * from renamed
```

### Intermediate and Mart Models

**Intermediate Models:**
```sql
-- models/intermediate/int_order_items_joined.sql
{{
    config(
        materialized='ephemeral'
    )
}}

with orders as (
    select * from {{ ref('stg_orders') }}
),

order_items as (
    select * from {{ ref('stg_order_items') }}
),

products as (
    select * from {{ ref('stg_products') }}
),

joined as (
    select
        orders.order_id,
        orders.user_id,
        orders.order_created_at,

        order_items.order_item_id,
        order_items.quantity,
        order_items.unit_price,

        products.product_id,
        products.product_name,
        products.category,

        order_items.quantity * order_items.unit_price as line_total

    from orders
    inner join order_items
        on orders.order_id = order_items.order_id
    inner join products
        on order_items.product_id = products.product_id
)

select * from joined
```

**Mart Models:**
```sql
-- models/marts/fct_orders.sql
{{
    config(
        materialized='table',
        tags=['fact']
    )
}}

with orders as (
    select * from {{ ref('stg_orders') }}
),

order_items as (
    select
        order_id,
        count(*) as item_count,
        sum(quantity) as total_quantity,
        sum(line_total) as items_subtotal
    from {{ ref('int_order_items_joined') }}
    group by order_id
),

final as (
    select
        -- Primary key
        orders.order_id,

        -- Foreign keys
        orders.user_id,

        -- Metrics
        orders.total_amount,
        orders.subtotal,
        orders.tax_amount,
        orders.shipping_amount,
        order_items.item_count,
        order_items.total_quantity,

        -- Dimensions
        orders.status,
        orders.payment_method,

        -- Timestamps
        orders.order_created_at,
        orders.completed_at,

        -- Metadata
        current_timestamp() as dbt_updated_at

    from orders
    left join order_items
        on orders.order_id = order_items.order_id
)

select * from final

-- models/marts/dim_customers.sql
{{
    config(
        materialized='table',
        tags=['dimension']
    )
}}

with users as (
    select * from {{ ref('stg_users') }}
),

orders as (
    select * from {{ ref('fct_orders') }}
),

customer_orders as (
    select
        user_id,
        count(*) as lifetime_orders,
        sum(total_amoun
Files: 1
Size: 20.3 KB
Complexity: 28/100
Category: data

Related in data