evade tls fingerprinting

This commit is contained in:
Alex Fox
2025-05-25 00:04:48 +00:00
parent 216a8f7c3b
commit 4e4f37c9fc

View File

@@ -2,7 +2,7 @@ import os
from io import StringIO from io import StringIO
import pandas as pd import pandas as pd
import requests import curl_cffi
import dagster as dg import dagster as dg
import datetime import datetime
@@ -12,7 +12,7 @@ from sqlalchemy import create_engine
IE_ENDPOINT = "https://publicreporting.elections.ny.gov/IndependentExpenditure" IE_ENDPOINT = "https://publicreporting.elections.ny.gov/IndependentExpenditure"
def get_cookies(s: requests.Session, from_date: datetime.date, to_date: datetime.date): def get_cookies(s: curl_cffi.Session, from_date: datetime.date, to_date: datetime.date):
"""Fetch cookies into session""" """Fetch cookies into session"""
cookie_postdata = { cookie_postdata = {
'lstUCOfficeType': '0', 'lstUCOfficeType': '0',
@@ -31,7 +31,11 @@ def get_cookies(s: requests.Session, from_date: datetime.date, to_date: datetime
'ddlSearchBy': 'All' 'ddlSearchBy': 'All'
} }
return s.post(f"{IE_ENDPOINT}/BindIndExpData/", json=cookie_postdata) return s.post(
f"{IE_ENDPOINT}/BindIndExpData/",
data=cookie_postdata,
impersonate="chrome",
)
def gen_ie_query(from_date: datetime.date, to_date: datetime.date): def gen_ie_query(from_date: datetime.date, to_date: datetime.date):
"""Fill in query parameters for independent expenditures and date range""" """Fill in query parameters for independent expenditures and date range"""
@@ -58,7 +62,7 @@ def gen_ie_query(from_date: datetime.date, to_date: datetime.date):
} }
NY_DAILY_PARTITION = dg.DailyPartitionsDefinition( NY_DAILY_PARTITION = dg.DailyPartitionsDefinition(
start_date="2025-01-01", start_date="2024-01-01",
timezone="America/New_York", timezone="America/New_York",
) )
@@ -72,19 +76,20 @@ def fetch_expenditures(context: dg.AssetExecutionContext) -> dg.MaterializeResul
end_date = pendulum.parse(context.partition_key).subtract(days=1) end_date = pendulum.parse(context.partition_key).subtract(days=1)
start_date = end_date.subtract(days=1) start_date = end_date.subtract(days=1)
with requests.Session() as s: with curl_cffi.Session() as s:
res = get_cookies(s, start_date, end_date) res = get_cookies(s, start_date, end_date)
if not res.json()["aaData"]: if not res.json()["aaData"]:
return None return None
req = s.get(f"{IE_ENDPOINT}/IndependentExpenditure", req = s.get(f"{IE_ENDPOINT}/IndependentExpenditure",
params=gen_ie_query(start_date, end_date), params=gen_ie_query(start_date, end_date),
impersonate="chrome",
) )
df = pd.read_csv(StringIO(req.text), index_col=False) df = pd.read_csv(StringIO(req.text), index_col=False)
engine = create_engine("postgresql://superset:PASSWORD@IP_ADDR/superset") engine = create_engine("postgresql://superset:PASSWORD@IP_ADDR/superset")
df.to_sql( df.to_sql(
"independent_expenditures_raw", "independent_expenditures_raw_redo",
con=engine, con=engine,
if_exists="append", if_exists="append",
) )