evade tls fingerprinting
This commit is contained in:
@@ -2,7 +2,7 @@ import os
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import curl_cffi
|
||||||
import dagster as dg
|
import dagster as dg
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
@@ -12,7 +12,7 @@ from sqlalchemy import create_engine
|
|||||||
|
|
||||||
IE_ENDPOINT = "https://publicreporting.elections.ny.gov/IndependentExpenditure"
|
IE_ENDPOINT = "https://publicreporting.elections.ny.gov/IndependentExpenditure"
|
||||||
|
|
||||||
def get_cookies(s: requests.Session, from_date: datetime.date, to_date: datetime.date):
|
def get_cookies(s: curl_cffi.Session, from_date: datetime.date, to_date: datetime.date):
|
||||||
"""Fetch cookies into session"""
|
"""Fetch cookies into session"""
|
||||||
cookie_postdata = {
|
cookie_postdata = {
|
||||||
'lstUCOfficeType': '0',
|
'lstUCOfficeType': '0',
|
||||||
@@ -31,7 +31,11 @@ def get_cookies(s: requests.Session, from_date: datetime.date, to_date: datetime
|
|||||||
'ddlSearchBy': 'All'
|
'ddlSearchBy': 'All'
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.post(f"{IE_ENDPOINT}/BindIndExpData/", json=cookie_postdata)
|
return s.post(
|
||||||
|
f"{IE_ENDPOINT}/BindIndExpData/",
|
||||||
|
data=cookie_postdata,
|
||||||
|
impersonate="chrome",
|
||||||
|
)
|
||||||
|
|
||||||
def gen_ie_query(from_date: datetime.date, to_date: datetime.date):
|
def gen_ie_query(from_date: datetime.date, to_date: datetime.date):
|
||||||
"""Fill in query parameters for independent expenditures and date range"""
|
"""Fill in query parameters for independent expenditures and date range"""
|
||||||
@@ -58,7 +62,7 @@ def gen_ie_query(from_date: datetime.date, to_date: datetime.date):
|
|||||||
}
|
}
|
||||||
|
|
||||||
NY_DAILY_PARTITION = dg.DailyPartitionsDefinition(
|
NY_DAILY_PARTITION = dg.DailyPartitionsDefinition(
|
||||||
start_date="2025-01-01",
|
start_date="2024-01-01",
|
||||||
timezone="America/New_York",
|
timezone="America/New_York",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -72,19 +76,20 @@ def fetch_expenditures(context: dg.AssetExecutionContext) -> dg.MaterializeResul
|
|||||||
end_date = pendulum.parse(context.partition_key).subtract(days=1)
|
end_date = pendulum.parse(context.partition_key).subtract(days=1)
|
||||||
|
|
||||||
start_date = end_date.subtract(days=1)
|
start_date = end_date.subtract(days=1)
|
||||||
with requests.Session() as s:
|
with curl_cffi.Session() as s:
|
||||||
res = get_cookies(s, start_date, end_date)
|
res = get_cookies(s, start_date, end_date)
|
||||||
if not res.json()["aaData"]:
|
if not res.json()["aaData"]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
req = s.get(f"{IE_ENDPOINT}/IndependentExpenditure",
|
req = s.get(f"{IE_ENDPOINT}/IndependentExpenditure",
|
||||||
params=gen_ie_query(start_date, end_date),
|
params=gen_ie_query(start_date, end_date),
|
||||||
|
impersonate="chrome",
|
||||||
)
|
)
|
||||||
df = pd.read_csv(StringIO(req.text), index_col=False)
|
df = pd.read_csv(StringIO(req.text), index_col=False)
|
||||||
|
|
||||||
engine = create_engine("postgresql://superset:PASSWORD@IP_ADDR/superset")
|
engine = create_engine("postgresql://superset:PASSWORD@IP_ADDR/superset")
|
||||||
df.to_sql(
|
df.to_sql(
|
||||||
"independent_expenditures_raw",
|
"independent_expenditures_raw_redo",
|
||||||
con=engine,
|
con=engine,
|
||||||
if_exists="append",
|
if_exists="append",
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user