Update opendata schema

This commit is contained in:
Dryusdan 2024-03-01 23:36:04 +01:00
parent ded8c61183
commit 337385a814
6 changed files with 805 additions and 379 deletions

973
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -6,15 +6,16 @@ authors = ["Dryusdan"]
license = "GPL-3"
[tool.poetry.dependencies]
python = "^3.9"
python = "^3.11"
SQLAlchemy = "^2.0.21"
PyYAML = "^6.0.1"
psycopg2-binary = "^2.9.9"
requests = "^2.31.0"
pydantic = "^2.4.2"
GeoAlchemy2 = "^0.14.1"
typer = "^0.9.0"
pendulum = "^2.1.2"
SQLAlchemy = "^1.4.37"
PyYAML = "^6.0"
psycopg2-binary = "^2.9.3"
requests = "^2.28.0"
pydantic = "^1.9.1"
GeoAlchemy2 = "^0.12.1"
typer = "^0.4.1"
rich = "^13.7.1"
[tool.poetry.dev-dependencies]
black = "^22.3.0"

View file

@ -5,6 +5,7 @@ import typer
from logger import Logger
from parser import parseBoucles, parseRecord
from settings import Settings
from rich import print
from sql import crud, models, database
settings = Settings()
@ -32,6 +33,7 @@ def populate_boucles():
)
if r.status_code == requests.codes.ok:
data = r.json()
print(data)
if data["records"] != []:
log.info("Records is not empty, increase start from 10")
start += 10
@ -61,7 +63,7 @@ def prepopulate_today():
process = True
while process:
r = requests.get(
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b"
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
)
if r.status_code == requests.codes.ok:
data = r.json()
@ -75,7 +77,7 @@ def prepopulate_today():
)
if record_date == today:
log.info("Date is good")
parseRecord(record)
parseRecord(record["fields"])
else:
log.info("Stop processing")
process = False
@ -101,7 +103,7 @@ def populate_day():
process = True
while process:
r = requests.get(
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b"
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
)
if r.status_code == requests.codes.ok:
data = r.json()
@ -117,7 +119,7 @@ def populate_day():
log.info("Date is not complete, ignore it")
elif record_date == good_day:
log.info("Date is good")
parseRecord(record)
parseRecord(record["fields"])
else:
log.info("Stop processing")
process = False
@ -139,7 +141,7 @@ def populate_all(
with open(f"{file}", "r") as json_file:
data = json.load(json_file)
for record in data:
log.debug(f'record date is {record["fields"]["dateformat"]}')
log.debug(f'record date is {record["dateformat"]}')
parseRecord(record, force)
@ -155,7 +157,7 @@ def consolidate_week():
process = True
while process:
r = requests.get(
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b"
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
)
if r.status_code == requests.codes.ok:
data = r.json()
@ -171,7 +173,51 @@ def consolidate_week():
log.info("Date is not complete, ignore it")
elif good_day_end <= record_date <= good_day_start:
log.info("Date is good")
parseRecord(record)
parseRecord(record["fields"])
else:
log.info("Stop processing")
process = False
break
start += 10
else:
log.debug("Records is empty, stop it")
process = False
else:
log.error(f"API return a bad status code : {r.status_code}")
process = False
@app.command()
def consolidate_month():
today = pendulum.today("UTC")
yesterday = pendulum.yesterday("UTC")
current_day = today.day
last_month = pendulum.now("UTC").subtract(days=current_day)
good_day_end = pendulum.datetime(last_month.year, last_month.month, 1, tz='UTC')
good_day_start = pendulum.datetime(last_month.year, last_month.month, last_month.days_in_month, tz='UTC')
limit = 10
start = 0
process = True
log.debug(f"Process date between {good_day_start} and {good_day_end}")
while process:
r = requests.get(
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
)
if r.status_code == requests.codes.ok:
data = r.json()
if data["records"] != []:
for record in data["records"]:
log.debug(
f'record date is {record["fields"]["dateformat"]}'
)
record_date = pendulum.parse(
record["fields"]["dateformat"]
)
if record_date > good_day_start:
log.debug(f"{record_date} is more recent than {good_day_end}")
elif good_day_end <= record_date <= good_day_start:
log.info("Date is good")
parseRecord(record["fields"])
else:
log.info("Stop processing")
process = False
@ -187,3 +233,6 @@ def consolidate_week():
if __name__ == "__main__":
app()
if __name__ == "__main__":
app()

View file

@ -31,89 +31,56 @@ def parseBoucles(boucles):
def parseRecord(record, force=False):
if not crud.is_boucle_id_exist(record["fields"]["boucle_num"]):
log.warning(f'Boucle {record["fields"]["boucle_num"]} not exist')
if not crud.is_boucle_id_exist(record["boucle_num"]):
log.warning(f'Boucle {record["boucle_num"]} not exist')
log.warning("Add this boucle in temp table")
unaccounted_table = True
else:
unaccounted_table = False
for hour in [
"00",
"01",
"02",
"03",
"04",
"05",
"06",
"07",
"08",
"09",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
]:
if hour in record["fields"]:
if record["fields"]["vacances_zone_b"] == "Hors Vacances":
holiday = False
if record["vacances_zone_b"] == "Hors Vacances":
holiday = False
else:
holiday = True
date = pendulum.parse(record["dateformat"])
if not crud.is_comptage_exist(
date, record["boucle_num"], unaccounted_table
):
comptage = models.Comptage(
id_boucle=record["boucle_num"],
datetime=date,
count=record["total"],
holiday=holiday,
week_day=record["jour_de_la_semaine"],
)
crud.create_comptage(comptage, unaccounted_table)
else:
db_comptage = crud.get_comptage_by_date_and_boucle(
date, record["boucle_num"], unaccounted_table
)
log.error(
f"Entry already exist ({record['results']['boucle_num']}, {date})"
)
log.debug(
f'Check if {db_comptage.count} != {record["total"]}'
)
if db_comptage.count != record["total"] or force:
if force:
log.info(f"Update with force {db_comptage.id}")
else:
holiday = True
date = pendulum.parse(record["fields"]["dateformat"]).at(
int(hour), 0, 0
log.warning(
f'Entry {db_comptage.id} have a different count (DB : {db_comptage.count}, data {record["total"]}'
)
comptage = models.Comptage(
id=db_comptage.id,
id_boucle=db_comptage.id_boucle,
datetime=db_comptage.datetime,
count=record["total"],
holiday=holiday,
week_day=record["jour_de_la_semaine"],
)
if not crud.is_comptage_exist(
date, record["fields"]["boucle_num"], unaccounted_table
):
comptage = models.Comptage(
id_boucle=record["fields"]["boucle_num"],
datetime=date,
count=record["fields"][hour],
holiday=holiday,
week_day=record["fields"]["jour_de_la_semaine"],
)
crud.create_comptage(comptage, unaccounted_table)
else:
db_comptage = crud.get_comptage_by_date_and_boucle(
date, record["fields"]["boucle_num"], unaccounted_table
)
log.error(
f"Entry already exist ({record['fields']['boucle_num']}, {date})"
)
log.debug(
f'Check if {db_comptage.count} != {record["fields"][hour]}'
)
if db_comptage.count != record["fields"][hour] or force:
if force:
log.info(f"Update with force {db_comptage.id}")
else:
log.warning(
f'Entry {db_comptage.id} have a different count (DB : {db_comptage.count}, data {record["field"][hour]}'
)
comptage = models.Comptage(
id=db_comptage.id,
id_boucle=db_comptage.id_boucle,
datetime=db_comptage.datetime,
count=record["fields"][hour],
holiday=holiday,
week_day=record["fields"]["jour_de_la_semaine"],
)
crud.update_comptage(comptage, unaccounted_table)
else:
log.info(f"Skip {db_comptage.id}, already exist")
crud.update_comptage(comptage, unaccounted_table)
else:
log.info(f"Skip {db_comptage.id}, already exist")
# else:
# log.error(f'Boucle {record["fields"]["boucle_num"]} not exist')
else:
log.error(
f'{hour} is not present for {record["fields"]["boucle_num"]}'
)

View file

@ -2,7 +2,7 @@ from sqlalchemy import (
Column,
Integer,
String,
DateTime,
Date,
ForeignKey,
func,
Boolean,
@ -26,7 +26,7 @@ class Comptage(Base):
id = Column(Integer, primary_key=True)
id_boucle = Column(Integer, ForeignKey("boucles.id"))
datetime = Column("date", DateTime(timezone=True), default=func.now())
datetime = Column("date", Date(), default=func.now())
week_day = Column(Integer)
holiday = Column(Boolean)
count = Column(Integer)
@ -37,7 +37,7 @@ class unaccounted_Comptage(Base):
id = Column(Integer, primary_key=True)
id_boucle = Column(Integer)
datetime = Column("date", DateTime(timezone=True), default=func.now())
datetime = Column("date", Date(), default=func.now())
week_day = Column(Integer)
holiday = Column(Boolean)
count = Column(Integer)

View file

@ -12,8 +12,8 @@ class ComptageBase(BaseModel):
id_boucle: int
datetime: datetime
count: int
week_day = int
holiday = bool
week_day: int
holiday: bool
class ComptageCreate(ComptageBase):