lineage_backend_demo

Lineage Backend

An example DAG demonstrating the usage of DataHub’s Airflow lineage backend.

Data Management & Governance


Providers:

Run this DAG

1. Install Astronomer CLISkip if you already have the CLI

2. Initate the project:

3. Copy and paste the code below into a file in the

dags
directory.

4. Add the following to your requirements.txt file:

5. Run the DAG:

"""Lineage Backend
An example DAG demonstrating the usage of DataHub's Airflow lineage backend.
"""
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
try:
from airflow.operators.bash import BashOperator
except ModuleNotFoundError:
from airflow.operators.bash_operator import BashOperator
from datahub_provider.entities import Dataset
default_args = {
"owner": "airflow",
"depends_on_past": False,
"email": ["jdoe@example.com"],
"email_on_failure": False,
"execution_timeout": timedelta(minutes=5),
}
with DAG(
"datahub_lineage_backend_demo",
default_args=default_args,
description="An example DAG demonstrating the usage of DataHub's Airflow lineage backend.",
schedule_interval=timedelta(days=1),
start_date=days_ago(2),
tags=["example_tag"],
catchup=False,
) as dag:
task1 = BashOperator(
task_id="run_data_task",
dag=dag,
bash_command="echo 'This is where you might run your data tooling.'",
inlets={
"datasets": [
Dataset("snowflake", "mydb.schema.tableA"),
Dataset("snowflake", "mydb.schema.tableB"),
],
},
outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]},
)