Source code for swdata.people

"""
These are people provider and transformation classes.
"""

import logging
from dataclasses import dataclass, field
from typing import Dict, Generator, Iterator, Tuple

import dateutil.parser as dp
import petl as etl

from swdata.planets import EagerPlanetProvider, PlanetProvider
from swdata.reports import save_report, Report
from swdata.settings import SWAPI_URL

log = logging.getLogger(__name__)

from swdata.adapters import results_generator


[docs]@dataclass class PeopleProvider: """Simple api adapter for fetching people data from SWAPI.""" url: str = field(default=f'{SWAPI_URL}people/')
[docs] def get_people(self) -> Generator[Dict, None, None]: yield from results_generator(self.url)
[docs]@dataclass class PeopleTransformator: """ Transformation pipeline component to enrich People data and replace home world reference with a name. """ planets: PlanetProvider = field(default_factory=EagerPlanetProvider) # planets: PlanetProvider = field(default_factory=LazyPlanetProvider) fields = ( 'name', 'height', 'mass', 'hair_color', 'skin_color', 'eye_color', 'birth_year', 'gender', 'homeworld', 'edited', 'url', )
[docs] def transform_people(self, people: Iterator[Dict]) -> Iterator[Tuple]: people = ( etl.fromdicts(people, self.fields) .convert('homeworld', self.planets.get_name) .addfield('date', lambda row: dp.parse(row['edited']).strftime("%Y-%m-%d")) .cutout('edited', 'url') .progress(5) ) for person in people: yield person
[docs]@dataclass class PeopleSaver: """An aggregate for handling src people data and saving reports.""" transformer: PeopleTransformator = field(default_factory=PeopleTransformator) provider: PeopleProvider = field(default_factory=PeopleProvider)
[docs] def save(self, directory: str) -> Report: raw_people = self.provider.get_people() # There is an implicit control delegation over planets from here to # PeopleTransformator class. ATM PlanetProvider implementations manage # their own caches. But that would change and for eg. we would need to # initialize the cache upfront this control should be executed from # this aggregate (or better yet from dependency injection container # setup) and not from the PeopleTransformator. people = self.transformer.transform_people(raw_people) return save_report(directory, people)