Update opendata schema
This commit is contained in:
parent
ded8c61183
commit
337385a814
973
poetry.lock
generated
973
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -6,15 +6,16 @@ authors = ["Dryusdan"]
|
|||
license = "GPL-3"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
python = "^3.11"
|
||||
SQLAlchemy = "^2.0.21"
|
||||
PyYAML = "^6.0.1"
|
||||
psycopg2-binary = "^2.9.9"
|
||||
requests = "^2.31.0"
|
||||
pydantic = "^2.4.2"
|
||||
GeoAlchemy2 = "^0.14.1"
|
||||
typer = "^0.9.0"
|
||||
pendulum = "^2.1.2"
|
||||
SQLAlchemy = "^1.4.37"
|
||||
PyYAML = "^6.0"
|
||||
psycopg2-binary = "^2.9.3"
|
||||
requests = "^2.28.0"
|
||||
pydantic = "^1.9.1"
|
||||
GeoAlchemy2 = "^0.12.1"
|
||||
typer = "^0.4.1"
|
||||
rich = "^13.7.1"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
black = "^22.3.0"
|
||||
|
|
63
src/main.py
63
src/main.py
|
@ -5,6 +5,7 @@ import typer
|
|||
from logger import Logger
|
||||
from parser import parseBoucles, parseRecord
|
||||
from settings import Settings
|
||||
from rich import print
|
||||
from sql import crud, models, database
|
||||
|
||||
settings = Settings()
|
||||
|
@ -32,6 +33,7 @@ def populate_boucles():
|
|||
)
|
||||
if r.status_code == requests.codes.ok:
|
||||
data = r.json()
|
||||
print(data)
|
||||
if data["records"] != []:
|
||||
log.info("Records is not empty, increase start from 10")
|
||||
start += 10
|
||||
|
@ -61,7 +63,7 @@ def prepopulate_today():
|
|||
process = True
|
||||
while process:
|
||||
r = requests.get(
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b"
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
|
||||
)
|
||||
if r.status_code == requests.codes.ok:
|
||||
data = r.json()
|
||||
|
@ -75,7 +77,7 @@ def prepopulate_today():
|
|||
)
|
||||
if record_date == today:
|
||||
log.info("Date is good")
|
||||
parseRecord(record)
|
||||
parseRecord(record["fields"])
|
||||
else:
|
||||
log.info("Stop processing")
|
||||
process = False
|
||||
|
@ -101,7 +103,7 @@ def populate_day():
|
|||
process = True
|
||||
while process:
|
||||
r = requests.get(
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b"
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
|
||||
)
|
||||
if r.status_code == requests.codes.ok:
|
||||
data = r.json()
|
||||
|
@ -117,7 +119,7 @@ def populate_day():
|
|||
log.info("Date is not complete, ignore it")
|
||||
elif record_date == good_day:
|
||||
log.info("Date is good")
|
||||
parseRecord(record)
|
||||
parseRecord(record["fields"])
|
||||
else:
|
||||
log.info("Stop processing")
|
||||
process = False
|
||||
|
@ -139,7 +141,7 @@ def populate_all(
|
|||
with open(f"{file}", "r") as json_file:
|
||||
data = json.load(json_file)
|
||||
for record in data:
|
||||
log.debug(f'record date is {record["fields"]["dateformat"]}')
|
||||
log.debug(f'record date is {record["dateformat"]}')
|
||||
parseRecord(record, force)
|
||||
|
||||
|
||||
|
@ -155,7 +157,7 @@ def consolidate_week():
|
|||
process = True
|
||||
while process:
|
||||
r = requests.get(
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b"
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
|
||||
)
|
||||
if r.status_code == requests.codes.ok:
|
||||
data = r.json()
|
||||
|
@ -171,7 +173,51 @@ def consolidate_week():
|
|||
log.info("Date is not complete, ignore it")
|
||||
elif good_day_end <= record_date <= good_day_start:
|
||||
log.info("Date is good")
|
||||
parseRecord(record)
|
||||
parseRecord(record["fields"])
|
||||
else:
|
||||
log.info("Stop processing")
|
||||
process = False
|
||||
break
|
||||
start += 10
|
||||
else:
|
||||
log.debug("Records is empty, stop it")
|
||||
process = False
|
||||
else:
|
||||
log.error(f"API return a bad status code : {r.status_code}")
|
||||
process = False
|
||||
|
||||
@app.command()
|
||||
def consolidate_month():
|
||||
today = pendulum.today("UTC")
|
||||
yesterday = pendulum.yesterday("UTC")
|
||||
current_day = today.day
|
||||
last_month = pendulum.now("UTC").subtract(days=current_day)
|
||||
good_day_end = pendulum.datetime(last_month.year, last_month.month, 1, tz='UTC')
|
||||
good_day_start = pendulum.datetime(last_month.year, last_month.month, last_month.days_in_month, tz='UTC')
|
||||
|
||||
limit = 10
|
||||
start = 0
|
||||
process = True
|
||||
log.debug(f"Process date between {good_day_start} and {good_day_end}")
|
||||
while process:
|
||||
r = requests.get(
|
||||
f"https://data.nantesmetropole.fr/api/records/1.0/search/?dataset=244400404_comptages-velo-nantes-metropole&q=&rows={limit}&start={start}&sort=jour&facet=boucle_num&facet=libelle&facet=jour&facet=probabilite_presence_anomalie&facet=jour_de_la_semaine&facet=boucle_libelle&facet=vacances_zone_b&order_by=dateformat%20desc"
|
||||
)
|
||||
if r.status_code == requests.codes.ok:
|
||||
data = r.json()
|
||||
if data["records"] != []:
|
||||
for record in data["records"]:
|
||||
log.debug(
|
||||
f'record date is {record["fields"]["dateformat"]}'
|
||||
)
|
||||
record_date = pendulum.parse(
|
||||
record["fields"]["dateformat"]
|
||||
)
|
||||
if record_date > good_day_start:
|
||||
log.debug(f"{record_date} is more recent than {good_day_end}")
|
||||
elif good_day_end <= record_date <= good_day_start:
|
||||
log.info("Date is good")
|
||||
parseRecord(record["fields"])
|
||||
else:
|
||||
log.info("Stop processing")
|
||||
process = False
|
||||
|
@ -187,3 +233,6 @@ def consolidate_week():
|
|||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
|
121
src/parser.py
121
src/parser.py
|
@ -31,89 +31,56 @@ def parseBoucles(boucles):
|
|||
|
||||
|
||||
def parseRecord(record, force=False):
|
||||
if not crud.is_boucle_id_exist(record["fields"]["boucle_num"]):
|
||||
log.warning(f'Boucle {record["fields"]["boucle_num"]} not exist')
|
||||
if not crud.is_boucle_id_exist(record["boucle_num"]):
|
||||
log.warning(f'Boucle {record["boucle_num"]} not exist')
|
||||
log.warning("Add this boucle in temp table")
|
||||
unaccounted_table = True
|
||||
else:
|
||||
unaccounted_table = False
|
||||
|
||||
for hour in [
|
||||
"00",
|
||||
"01",
|
||||
"02",
|
||||
"03",
|
||||
"04",
|
||||
"05",
|
||||
"06",
|
||||
"07",
|
||||
"08",
|
||||
"09",
|
||||
"10",
|
||||
"11",
|
||||
"12",
|
||||
"13",
|
||||
"14",
|
||||
"15",
|
||||
"16",
|
||||
"17",
|
||||
"18",
|
||||
"19",
|
||||
"20",
|
||||
"21",
|
||||
"22",
|
||||
"23",
|
||||
]:
|
||||
if hour in record["fields"]:
|
||||
if record["fields"]["vacances_zone_b"] == "Hors Vacances":
|
||||
holiday = False
|
||||
if record["vacances_zone_b"] == "Hors Vacances":
|
||||
holiday = False
|
||||
else:
|
||||
holiday = True
|
||||
date = pendulum.parse(record["dateformat"])
|
||||
if not crud.is_comptage_exist(
|
||||
date, record["boucle_num"], unaccounted_table
|
||||
):
|
||||
comptage = models.Comptage(
|
||||
id_boucle=record["boucle_num"],
|
||||
datetime=date,
|
||||
count=record["total"],
|
||||
holiday=holiday,
|
||||
week_day=record["jour_de_la_semaine"],
|
||||
)
|
||||
crud.create_comptage(comptage, unaccounted_table)
|
||||
else:
|
||||
db_comptage = crud.get_comptage_by_date_and_boucle(
|
||||
date, record["boucle_num"], unaccounted_table
|
||||
)
|
||||
log.error(
|
||||
f"Entry already exist ({record['results']['boucle_num']}, {date})"
|
||||
)
|
||||
log.debug(
|
||||
f'Check if {db_comptage.count} != {record["total"]}'
|
||||
)
|
||||
if db_comptage.count != record["total"] or force:
|
||||
if force:
|
||||
log.info(f"Update with force {db_comptage.id}")
|
||||
else:
|
||||
holiday = True
|
||||
date = pendulum.parse(record["fields"]["dateformat"]).at(
|
||||
int(hour), 0, 0
|
||||
log.warning(
|
||||
f'Entry {db_comptage.id} have a different count (DB : {db_comptage.count}, data {record["total"]}'
|
||||
)
|
||||
comptage = models.Comptage(
|
||||
id=db_comptage.id,
|
||||
id_boucle=db_comptage.id_boucle,
|
||||
datetime=db_comptage.datetime,
|
||||
count=record["total"],
|
||||
holiday=holiday,
|
||||
week_day=record["jour_de_la_semaine"],
|
||||
)
|
||||
if not crud.is_comptage_exist(
|
||||
date, record["fields"]["boucle_num"], unaccounted_table
|
||||
):
|
||||
comptage = models.Comptage(
|
||||
id_boucle=record["fields"]["boucle_num"],
|
||||
datetime=date,
|
||||
count=record["fields"][hour],
|
||||
holiday=holiday,
|
||||
week_day=record["fields"]["jour_de_la_semaine"],
|
||||
)
|
||||
crud.create_comptage(comptage, unaccounted_table)
|
||||
else:
|
||||
db_comptage = crud.get_comptage_by_date_and_boucle(
|
||||
date, record["fields"]["boucle_num"], unaccounted_table
|
||||
)
|
||||
log.error(
|
||||
f"Entry already exist ({record['fields']['boucle_num']}, {date})"
|
||||
)
|
||||
log.debug(
|
||||
f'Check if {db_comptage.count} != {record["fields"][hour]}'
|
||||
)
|
||||
if db_comptage.count != record["fields"][hour] or force:
|
||||
if force:
|
||||
log.info(f"Update with force {db_comptage.id}")
|
||||
else:
|
||||
log.warning(
|
||||
f'Entry {db_comptage.id} have a different count (DB : {db_comptage.count}, data {record["field"][hour]}'
|
||||
)
|
||||
comptage = models.Comptage(
|
||||
id=db_comptage.id,
|
||||
id_boucle=db_comptage.id_boucle,
|
||||
datetime=db_comptage.datetime,
|
||||
count=record["fields"][hour],
|
||||
holiday=holiday,
|
||||
week_day=record["fields"]["jour_de_la_semaine"],
|
||||
)
|
||||
crud.update_comptage(comptage, unaccounted_table)
|
||||
else:
|
||||
log.info(f"Skip {db_comptage.id}, already exist")
|
||||
crud.update_comptage(comptage, unaccounted_table)
|
||||
else:
|
||||
log.info(f"Skip {db_comptage.id}, already exist")
|
||||
# else:
|
||||
# log.error(f'Boucle {record["fields"]["boucle_num"]} not exist')
|
||||
else:
|
||||
log.error(
|
||||
f'{hour} is not present for {record["fields"]["boucle_num"]}'
|
||||
)
|
||||
|
|
|
@ -2,7 +2,7 @@ from sqlalchemy import (
|
|||
Column,
|
||||
Integer,
|
||||
String,
|
||||
DateTime,
|
||||
Date,
|
||||
ForeignKey,
|
||||
func,
|
||||
Boolean,
|
||||
|
@ -26,7 +26,7 @@ class Comptage(Base):
|
|||
|
||||
id = Column(Integer, primary_key=True)
|
||||
id_boucle = Column(Integer, ForeignKey("boucles.id"))
|
||||
datetime = Column("date", DateTime(timezone=True), default=func.now())
|
||||
datetime = Column("date", Date(), default=func.now())
|
||||
week_day = Column(Integer)
|
||||
holiday = Column(Boolean)
|
||||
count = Column(Integer)
|
||||
|
@ -37,7 +37,7 @@ class unaccounted_Comptage(Base):
|
|||
|
||||
id = Column(Integer, primary_key=True)
|
||||
id_boucle = Column(Integer)
|
||||
datetime = Column("date", DateTime(timezone=True), default=func.now())
|
||||
datetime = Column("date", Date(), default=func.now())
|
||||
week_day = Column(Integer)
|
||||
holiday = Column(Boolean)
|
||||
count = Column(Integer)
|
||||
|
|
|
@ -12,8 +12,8 @@ class ComptageBase(BaseModel):
|
|||
id_boucle: int
|
||||
datetime: datetime
|
||||
count: int
|
||||
week_day = int
|
||||
holiday = bool
|
||||
week_day: int
|
||||
holiday: bool
|
||||
|
||||
|
||||
class ComptageCreate(ComptageBase):
|
||||
|
|
Loading…
Reference in a new issue