academic_observatory_workflows.openalex_telescope.release

Classes

OpenAlexEntity

Manifest

Meta

ManifestEntry

MergedId

Functions

s3_uri_parts(→ Tuple[str, str])

Extracts the S3 bucket name and object key from the given S3 URI.

Module Contents

class academic_observatory_workflows.openalex_telescope.release.OpenAlexEntity(*, dag_id: str, run_id: str, cloud_workspace: observatory_platform.airflow.workflow.CloudWorkspace, entity_name: str, bq_dataset_id: str, schema_folder: str, snapshot_date: pendulum.DateTime, manifest: Manifest, merged_ids: List[MergedId], is_first_run: bool)[source]

Bases: observatory_platform.airflow.release.SnapshotRelease

cloud_workspace[source]
entity_name[source]
bq_dataset_id[source]
schema_folder[source]
manifest[source]
merged_ids[source]
is_first_run[source]
transfer_manifest_uri[source]
gcs_openalex_data_uri = 'gs://Uninferable/Uninferable/'[source]
log_path[source]
property table_description[source]
property schema_file_path[source]
property generated_schema_path[source]
property data_uri[source]
property bq_table_id[source]
property entries[source]
static from_dict(dict_: dict) OpenAlexEntity[source]
to_dict() dict[source]
academic_observatory_workflows.openalex_telescope.release.s3_uri_parts(s3_uri: str) Tuple[str, str][source]

Extracts the S3 bucket name and object key from the given S3 URI.

Parameters:

s3_uri – str, S3 URI in format s3://mybucketname/path/to/object

Returns:

tuple, (bucket_name, object_key)

class academic_observatory_workflows.openalex_telescope.release.Manifest(entries: List[ManifestEntry], meta: Meta)[source]
entries[source]
meta[source]
__eq__(other)[source]
static from_dict(dict_: Dict) Manifest[source]
to_dict() Dict[source]
class academic_observatory_workflows.openalex_telescope.release.Meta(content_length, record_count)[source]
content_length[source]
record_count[source]
__eq__(other)[source]
static from_dict(dict_: Dict) Meta[source]
to_dict() Dict[source]
class academic_observatory_workflows.openalex_telescope.release.ManifestEntry(url: str, meta: Meta)[source]
meta[source]
__eq__(other)[source]
property object_key[source]
property updated_date: pendulum.DateTime[source]
property file_name[source]
static from_dict(dict_: Dict) ManifestEntry[source]
to_dict() Dict[source]
class academic_observatory_workflows.openalex_telescope.release.MergedId(url: str, content_length: int)[source]
url[source]
content_length[source]
__eq__(other)[source]
property object_key[source]
property updated_date: pendulum.DateTime[source]
property file_name[source]
static from_dict(dict_: Dict) MergedId[source]
to_dict() Dict[source]