PK!腒蕁CCDla_businesses/.git/objects/08/0f34a407598181e9783debd40efee85ca37d4cxK噬OR05d垙/K-*翁蠇廤癠P7󖀅P玮J+氏U兴I孫*-翁K-.N-V忍-/*Q"隤K!: Dla_businesses/.git/objects/4b/825dc642cb6eb9a060e54bf8d69288fbee4904x+)JMU0` ,PK!匤矦**Dla_businesses/.git/objects/58/1c55e609633b3214f495b6a7f4a92a8f01908ex昗醤6 揎<螩PK蒇v貈迱kr[pmR.啳+ 艗藫滏rE遾莙诨mA佢2E蓮2桲x筝弌臼6旞阼^齘矀3 鳺騠3#L词M/Ur%3<[B巾_q峩锅吧泤rg>佦圃猟J媐逪(5-!4颿懖*7鶺憲铯0c刯翦r凳奤疻?KΤ銪i厄s 虬2澖!Bl#Fh蚔 z桔%勦瓖)貔z蛡%欃RX櫇=db~Pj}>U霯森3KT鞪庣客.纾弊粯_]_N|9綁H5梭xY楝@Ya7z脚鋌2嬾"J$3mt}陪谹插梧$蛂z璂谤3#唯I噾那L沊~隘鑜E +(蝱?%t) 孲赶厎膆(6&揀5BT8,GIf* 変Bw奺Z(;`縪3Y娐o鼳';H鳞.e5蝑2臑浰d呸 密Y拿鲺woj'i郎朻玃衉襳gL揇Y慗?鮪貎噼 e72姹c錝9E跁0*囿比%沱富`Ph脵,0~UIU綾爹V6&Y屩!cH癊烊荎4K肴笹P炩%0!x寘鄵ん踼倔.:熈M4ZD0E镛22n概玳gD?27]璥駳媏凋= F椞!2 汩W彠v裺q田B=Lm[h憨#已f尤薽A☆爈冏E鰠掖6t错鞮&r1{硠齓x痃佷- 柆澝 ,? b#<O+pZA毘Y靡d瞥瑇A5璩A啭蜒詥黖舸. 竉虌nNX軨g舆U楃y [M2 t檪Y q@こ4刂奯%v$u.$!栛/煱黁晠+t入s/;0y %饜嚟.最鞄赩昀Ae捓晖> 煕)=鶣@{憣7ミ& =? 4e2划Rz跹:l,訴慈Ebb姿安 +C性d#芅囘慨,⿲LSK柁呠"ТFGp孥蚔 毻6':dqo诫F奢殞裵F諬"囫`熍<澝c廄墽'绬北争讕 褜粨贩7淤滏f+缮N苚G皲迿Uh溚栥l/w釈$:摩兠1I^鹛籄M柚rG>:M;d1Fガ宮D~'夥兞 4@&庯魳#C蘜.>綿P01m<熜簼r算冻Z撢Jty怯M纳骙握欀浪, 骐j﹨ 燮爿wR ~彮G>9榶 銶W叴%^馊埣k橞b覾m堯~B蓿H]2>殻D浧0唤 I)j> <7焍'呚2谩洽E嘓Hno'S<g9&w耠埃 ШDeドdz8醝菝枥;k銶礃L"嵡 , 镛簔鏬L_/L痲銆嗍乿语柢DJ舥x#;竜-sx%M7方gc#XM銟譓qXB牗7pGヂ唓鱢抙p伱曚K;宎gu`覾叧hjo唙{塕Mf7t惫>]垭T壝@1桰7";cC襛躇寤 儘;-`溓镜'ye麻旔%.?磻Jo#笀櫺鬯氉C1锟溓6魓.1攘J玺僽ge潈UC8,榁#NQlV須持u坿;焂瞠u9脹鎘僎o冒Bj62jテ邑浦jd鋩}醂{岵PH:p螱{瀢e雫hw厍 -摵U1鱞窋L)蕈敧J觶衐菇玏z1e1簜眼"殞岱i+\D嬎計P墕珀8&Z奵q缐c)=кa蛀G+fPK!^^Dla_businesses/.git/objects/b5/68c545e552ffae1d960931ecd2d28c13f23cd9x+)JMU0f040031Q垙咸,墢+╠噜7Y儇脉/b@曞$'g妤冊F葎>鉒62u垓/+碟'灨" PK!9U蟽Dla_businesses/.git/objects/d8/ef049d6e7889551f31817553b1835cfd09cf86x澩A 0卆9臷@$6.3纳J犍瓃qt@o}' 炆櫆 厯I3趌,庵Ф=.浱疣〦8'y]隬N荜惾y玻C8闝k惦蹇Z輺襅溼7^邆<PK!F麇S55la_businesses/__init__.py__version__ = '0.1.1' from .la_businesses import *PK!鮦jla_businesses/la_businesses.py"""Save the latest database of businesses in Los Angeles as CSV and KML. # Description This script downloads and processes the listing of all active businesses currently registered with the City of Los Angeles Office of Finance. An 'active' business is defined as a registered business whose owner has not notified the Office of Finance of a cease of business operations. Update Interval: Monthly. Data source: https://data.lacity.org/A-Prosperous-City/Listing-of-Active-Businesses/6rrh-rzua This script fetches the data and saves it locally as a CSV file. It also selects a subset of businesses with operation starting date within the last NDAYS days (default 30) and saves this as a separate CSV file. Finally, it creates and saves a KML file from the subset, useful for importing into Google Maps or similar software to visualize the distribution of recent businesses opened in the Los Angeles area. # Installation Install with pip. The package installs as a command-line script. ``` pip install la-businesses ``` # Usage Run from the command line (it installs as as script). All downloaded and generated files will be stored in a directory `files` inside the current working directory. ``` usage: la-businesses [-h] [-u] [-d NDAYS] optional arguments: -h, --help show this help message and exit -u, --update update data (default: False) -d NDAYS, --days NDAYS started since NDAYS days ago (default: 30) ``` # Known issues ## Locations with missing coordinates are omitted from KML file The script relies on coordinate data already provided in the downloaded dataset. Some businesses contain addresses but no coordinates; in these cases, the business is ignored when creating the KML (but is included in any saved CSV file). Future implementations should include a function to look up location coordinates from a given address (e.g., using the Open Street Maps API). ## Locations with no DBA name simply show NaN in the KML file The script could use better handling of business name / DBA combinations to omit NaN from KML when it does not have a business name. ## No phone numbers The data does not include any phone or email contact information; merging this dataset with one that includes contact information would be more useful for market research. """ # ----------------------------------------------------------------------------# # LIBRARIES # ----------------------------------------------------------------------------# from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from pathlib import Path import logging import datetime as dt import pytz # for timezone handling import pandas as pd import requests import simplekml # ----------------------------------------------------------------------------# # CONSTANTS AND CONFIGURATION # ----------------------------------------------------------------------------# logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") URL = "https://data.lacity.org/api/views/6rrh-rzua/rows.csv?accessType=DOWNLOAD" COMPLETE_LIST = "all_businesses.csv" # full database of businesses RECENT_LIST = "recent_businesses.csv" # subset of recent businesses DEFAULT_NDAYS = 30 # select businesses opened since this many days ago WRITE_CHUNK_SIZE = 1024 # bytes OUTPUT_DIR = Path.cwd() / 'files' # save all files here OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # make the dir if not exist # ----------------------------------------------------------------------------# # CORE FUNCTIONS # ----------------------------------------------------------------------------# def get_business_list(): """Download latest businesses database.""" response = requests.get(URL, stream=True) # Throw an error for bad status codes response.raise_for_status() with open(OUTPUT_DIR / COMPLETE_LIST, "wb") as handle: for block in response.iter_content(DEFAULT_NDAYS): handle.write(block) logging.info(f"Saved complete business list as {COMPLETE_LIST}.") return OUTPUT_DIR / COMPLETE_LIST def load_business_list(file=None, update=False): """Load (optionally identified) database from file or download it first.""" business_list_file = OUTPUT_DIR / COMPLETE_LIST if file: logging.info(f"Loading business list {file.name} ...") df = pd.read_csv(file) df["LOCATION START DATE"] = pd.to_datetime(df["LOCATION START DATE"]) df["LOCATION END DATE"] = pd.to_datetime(df["LOCATION END DATE"]) logging.debug("Converted dates") return df if update or not business_list_file.exists(): logging.info("Downloading database of businesses ...") get_business_list() logging.info( f"Loading all businesses ...\n" \ f"Using cached data from {last_mod(business_list_file)}. " \ f"Use -u flag to update.") df = pd.read_csv(business_list_file) df["LOCATION START DATE"] = pd.to_datetime(df["LOCATION START DATE"]) df["LOCATION END DATE"] = pd.to_datetime(df["LOCATION END DATE"]) logging.debug("Converted dates") return df def last_mod(file): """Returns a string of the last modified time of a Path() in local timezone""" fmt = "%d %b %Y at %I:%M %p %Z" # time format return pytz.utc.localize(dt.datetime.utcfromtimestamp( file.stat().st_mtime)).astimezone().strftime(fmt) def select_recent(df, outfile=None, ndays=DEFAULT_NDAYS): logging.info(f"Selecting businesses starting {ndays} days ago or later ...") cutoff_date = dt.datetime.now() - dt.timedelta(days=ndays) df = df[df["LOCATION START DATE"] > cutoff_date] logging.debug(f"Selected recent since {cutoff_date.date()}: {len(df)} items") df = df.sort_values(by="LOCATION START DATE", ascending=False) logging.debug("Sorted by start date") output_filename = outfile or RECENT_LIST output_file = OUTPUT_DIR / output_filename df.to_csv(output_file, index=False) logging.info(f"Saved {len(df)} recent businesses to {output_file}.") return df def df_to_kml(df, outfile=None): """Make a KML file from pd.DataFrame of addresses""" df = df.dropna(subset=["LOCATION"]) df = df.reset_index(drop=True) logging.debug("Ignoring places with no lat-long") kml = simplekml.Kml() for id, row in df.iterrows(): long, lat = eval(row["LOCATION"]) kml.newpoint( name=str(row["BUSINESS NAME"]) + "\n" + str(row["DBA NAME"]), description=", ".join( [row["STREET ADDRESS"], row["CITY"], row["ZIP CODE"]] ), coords=[(lat, long)], ) output_filename = outfile or "recent_businesses" output_file = OUTPUT_DIR / (output_filename + ".kml") kml.save(output_file) logging.debug("made points") logging.info("Created KML file " + str(output_file)) # ----------------------------------------------------------------------------# # USER INTERFACE FUNCTIONS # ----------------------------------------------------------------------------# def get_parser(): parser = ArgumentParser( description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter ) parser.add_argument( "-u", "--update", action="store_true", dest="update", help="update data" ) parser.add_argument( "-d", "--days", action="store", dest="ndays", type=int, default=DEFAULT_NDAYS, help="started since NDAYS days ago", ) return parser def main(): try: args = get_parser().parse_args() df = load_business_list(update=args.update) df = select_recent(df, ndays=args.ndays) df_to_kml(df) except KeyboardInterrupt: logging.error("!!! PROGRAM ABORTED WITH CTRL-C. !!!\n") if __name__ == "__main__": main() PK!H鄹~D-4.la_businesses-0.1.1.dist-info/entry_points.txt婲蜗+蜗I/N.,()庡蔍訫*-翁K-.N-锻I孏瓞r3蟾PK!H腾絋U#la_businesses-0.1.1.dist-info/WHEEL 蔄 薪ю#Z;/"⒅d&F[x鹺雡蹳Z硃戳y3仭〾F歷隴\fi4WZ^E蒰碝_-]#0(q7PK!H棧r4 &la_businesses-0.1.1.dist-info/METADATA昖遫6~鏮q@枩莺篿17A 塖詉嘺(F%"⿷T\鳢遷れ豬^飵w遾鱍W*萊檥U蝛k&t姍l談欇鮚褰騜cp耫薹璽 湾儮P+q;=簱6ti=MM,ェ御W挦\黫[晆猜盻f畴縡釸蕏<:>灾峥4絡韬ōf獣簷P~o愣遢及淼S>)8=:y堄FzZ!靝M&t=?9麪J^泗貊幩'g+'踁泭.ォz$霄$䎱眢讖 焛& @调A~*?澿鳁黗'肬fG嚳錹F'鴠髮櫭M忩C焉3V^穄n蹎骳刢3 ЩdеeBv酬小牼1鑠_讠馀陥/h薡茉赟Z{4V>r/慟葐揹訍,锥H%娹9ぱ琍Yk逍触u?誥盼勰籢,蠰~鶴i 晸z檪寇D'䲢LV箎榔fY[&(G5瑢 $鹋-/c ?嫛B=矟r捬馠鐺)Rt屳矾w莠r!0^鋗 郶囆x󉣇睝焦u誼殎mm3Fa|櫊天"浧J强:Wg頖/臢k*跣 >!v萩N L毑J囵,/t0z-y燷/恖鏤x"盙洑閎玞菁 紿I櫇M瀋k迮z"&旭窿V>佊帣x誌茿Sb鋺閳.渂檴"鵍V爃J g踴l蕐D絎嬀u剻)Gm偉?鄎%;粑 寣nga)岩凍絣魪$%:圄]NAS6&冑`礑譅E皻&&咵偝覿N7p韉q详⒂顎 騒f Z寴贩巏'撍粅弜鈙o!翾 hwN[揾?)噳懐P)w⿲3`t嚧倁鋄B 琖t峮]&4嚵'贝>gm澢飹漥锜啹灴誓猳B(JEP?DY=,玌友吻譾欒鱖Dfü*鮙鲮俪O#减災IZ觴B1-j熭唽貦>>0AP徢Q Y侪8槆X8麟pH槆赾'(/+-堵ZWb>Bb賄囏n髗.搄蚛5[畠:4`i筨 衑阞$㧟镔s谕q遫s縺繧n{Y呼=軳痦蔣 硼b[眜e"仏2i止H6鉠8&[櫌閲 E ).滮貒h膡)1@囨鱉滚G)釦ck镅(宷8繁峹J獱紱"iO錟"砃 @筻2*唆御>⺳-8g甬杗霼"鏛沃?桌"a眉≌|q櫜 𣁞A裤*馍⊿0G)扗5_*]5瘦p:f:简(2}{嚪蕵]ば0簱l±=/1╮觶1tUQ{1發竎 嗆勽爀詡>郦/F1斫挳üPK!H1駭V$la_businesses-0.1.1.dist-info/RECORD呇K摙:礼齶h&Y8: 舦C厫姞痖L枕捂;$节⒁謏踸M疦鱖5xL$r▍莙τm存TI靍D96棃贻3B俬?f]癉)W栫桷;碉{蹮峅k<=0颎4I{袻1俆蕋*$`@S𻘄7┲D颌i赐繂6囶臣籵O嬿級嚛X褦鳡3 )F"h胜gHr\胯昚wK褣艉0硿^禪Q剾載绐-绮鹹眦縦[彮9&_)1(Rd屧0螮0*C b儼誓 G-g1塋囃$墸x濞 /輧栲簞.p臈t苳"2=バ噻焮斺艄6U&B蝆ty矖n赺軫拲兺~谗緛1榐互c滨函蝑z絮y篞籿a41荻7穷鄍@袩Y剂缢 鄹Ee隇畾K洔辎j靃骽^蘎滎笆l唗鈴nq_ v駕>掔圎珅菓=7墖a8~昽[q潪鉔|TI1/蕑t踈Bm'獢Q價籰陪7錓鶀?騙袼p*帄鳯u<翾"r杆漆0蹲