academic_observatory_workflows.openalex_telescope.telescope

Classes

DagParams

Functions

create_dag(→ airflow.DAG)

Module Contents

class academic_observatory_workflows.openalex_telescope.telescope.DagParams(*, dag_id: str, cloud_workspace: observatory_platform.airflow.workflow.CloudWorkspace, bq_dataset_id: str = 'openalex', api_bq_dataset_id: str = 'dataset_api', entity_names: List[str] = None, schema_folder: str = project_path('openalex_telescope', 'schema'), dataset_description: str = 'The OpenAlex dataset: https://docs.openalex.org/', non_concurrent_table_expiry_days: int = 62, n_transfer_trys: int = 3, primary_key: str = 'id', aws_conn_id: str = 'aws_openalex', aws_openalex_bucket: str = 'openalex', slack_conn_id: str | None = AirflowConns.SLACK, start_date: pendulum.DateTime = pendulum.datetime(2021, 12, 1), schedule: str = '@weekly', max_active_runs: int = 1, retries: int = 3, gke_image: str = DEFAULT_GKE_IMAGE, gke_namespace: str = 'coki-astro', gke_volume_path: str = '/data', gke_resource_map: dict = None, gke_volume_map: dict = None, gke_conn_id: str = 'gke_cluster', **kwargs)[source]
dag_id[source]
cloud_workspace[source]
bq_dataset_id = 'openalex'[source]
api_bq_dataset_id = 'dataset_api'[source]
entity_names = None[source]
schema_folder[source]
dataset_description = 'The OpenAlex dataset: https://docs.openalex.org/'[source]
non_concurrent_table_expiry_days = 62[source]
n_transfer_trys = 3[source]
primary_key = 'id'[source]
aws_conn_id = 'aws_openalex'[source]
aws_openalex_bucket = 'openalex'[source]
slack_conn_id[source]
start_date[source]
schedule = '@weekly'[source]
max_active_runs = 1[source]
retries = 3[source]
gke_conn_id = 'gke_cluster'[source]
gke_params_map[source]
academic_observatory_workflows.openalex_telescope.telescope.create_dag(dag_params: DagParams) airflow.DAG[source]