PKGwN 12000: model = MiniBatchKMeans( n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, tol=tol, verbose=verbose, random_state=random_state) else: model = KMeans( n_clusters=n_clusters, init='k-means++', n_init=n_init, max_iter=max_iter, tol=tol, precompute_distances=precompute_distances, verbose=0, random_state=random_state, copy_x=copy_x, n_jobs=n_jobs, algorithm=algorithm) model.fit(X) return model def affinity_propagation(X, damping=0.8, preference=-1000, max_iter=500, convergence_iter=15, copy=True, affinity='euclidean', verbose=False, **kwargs): """Clustering with Affinity Propagation. Parameters ---------- X : array-like n x k attribute data preference : array-like, shape (n_samples,) or float, optional, default: None The preference parameter passed to scikit-learn's affinity propagation algorithm damping: float, optional, default: 0.8 The damping parameter passed to scikit-learn's affinity propagation algorithm max_iter : int, optional, default: 1000 Maximum number of iterations Returns ------- model: sklearn AffinityPropagation instance """ model = AffinityPropagation( preference=preference, damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, copy=copy, affinity=affinity, verbose=verbose) model.fit(X) return model def spectral(X, n_clusters, eigen_solver=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=-1, **kwargs): """Short summary. Parameters ---------- X : arral-like n x k attribute data n_clusters : type The number of clusters to form as well as the number of centroids to generate. eigen_solver : type Description of parameter `eigen_solver` (the default is None). random_state : type Description of parameter `random_state` (the default is None). n_init : type Description of parameter `n_init` (the default is 10). gamma : type Description of parameter `gamma` (the default is 1.0). affinity : type Description of parameter `affinity` (the default is 'rbf'). n_neighbors : type Description of parameter `n_neighbors` (the default is 10). eigen_tol : type Description of parameter `eigen_tol` (the default is 0.0). assign_labels : type Description of parameter `assign_labels` (the default is 'kmeans'). degree : type Description of parameter `degree` (the default is 3). coef0 : type Description of parameter `coef0` (the default is 1). kernel_params : type Description of parameter `kernel_params` (the default is None). n_jobs : type Description of parameter `n_jobs` (the default is -1). **kwargs : type Description of parameter `**kwargs`. Returns ------- model: sklearn SpectralClustering instance """ model = SpectralClustering( n_clusters=n_clusters, eigen_solver=eigen_solver, random_state=random_state, n_init=n_init, gamma=gamma, affinity=affinity, n_neighbors=n_neighbors, eigen_tol=eigen_tol, assign_labels=assign_labels, degree=degree, coef0=coef0, kernel_params=kernel_params, n_jobs=n_jobs) model.fit(X) return model def gaussian_mixture(X, n_clusters=5, covariance_type="full", best_model=False, max_clusters=10, random_state=None, **kwargs): """Clustering with Gaussian Mixture Model Parameters ---------- X : array-like n x k attribute data n_clusters : int, optional, default: 5 The number of clusters to form. covariance_type: str, optional, default: "full"" The covariance parameter passed to scikit-learn's GaussianMixture algorithm best_model: bool, optional, default: False Option for finding endogenous K according to Bayesian Information Criterion max_clusters: int, optional, default:10 The max number of clusters to test if using `best_model` option random_state: int, optional, default: None The seed used to generate replicable results Returns ------- model: sklearn GaussianMixture instance """ if random_state is None: warn("Note: Gaussian Mixture Clustering is probabilistic--\ cluster labels may be different for different runs. If you need consistency,\ you should set the `random_state` parameter") if best_model is True: # selection routine from # https://plot.ly/scikit-learn/plot-gmm-selection/ lowest_bic = np.infty bic = [] maxn = max_clusters + 1 n_components_range = range(1, maxn) cv_types = ['spherical', 'tied', 'diag', 'full'] for cv_type in cv_types: for n_components in n_components_range: # Fit a Gaussian mixture with EM gmm = GaussianMixture( n_components=n_components, random_state=random_state, covariance_type=cv_type) gmm.fit(X) bic.append(gmm.bic(X)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_gmm = gmm bic = np.array(bic) model = best_gmm else: model = GaussianMixture( n_components=n_clusters, random_state=random_state, covariance_type=covariance_type) model.fit(X) model.labels_ = model.predict(X) return model def hdbscan(X, min_cluster_size=5, gen_min_span_tree=True, **kwargs): """Clustering with Hierarchical DBSCAN Parameters ---------- X : array-like n x k attribute data min_cluster_size : int, default: 5 the minimum number of points necessary to generate a cluster gen_min_span_tree : bool Description of parameter `gen_min_span_tree` (the default is True). Returns ------- model: hdbscan HDBSCAN instance """ model = HDBSCAN(min_cluster_size=min_cluster_size) model.fit(X) return model # Spatially Explicit/Encouraged Methods def ward_spatial(X, w, n_clusters=5, **kwargs): """Agglomerative clustering using Ward linkage with a spatial connectivity constraint Parameters ---------- X : array-like n x k attribute data w : PySAL W instance spatial weights matrix n_clusters : int, optional, default: 5 The number of clusters to form. Returns ------- model: sklearn AgglomerativeClustering instance """ model = AgglomerativeClustering( n_clusters=n_clusters, connectivity=w.sparse, linkage='ward') model.fit(X) return model def spenc(X, w, n_clusters=5, gamma=1, **kwargs): """Spatially encouraged spectral clustering :cite:`wolf2018` Parameters ---------- X : array-like n x k attribute data w : PySAL W instance spatial weights matrix n_clusters : int, optional, default: 5 The number of clusters to form. gamma : int, default:1 TODO. Returns ------- model: spenc SPENC instance """ model = SPENC(n_clusters=n_clusters, gamma=gamma) model.fit(X, w.sparse) return model def skater(X, w, n_clusters=5, floor=-np.inf, trace=False, islands='increase', **kwargs): """SKATER spatial clustering algorithm. Parameters ---------- X : array-like n x k attribute data w : PySAL W instance spatial weights matrix n_clusters : int, optional, default: 5 The number of clusters to form. floor : type TODO. trace : type TODO. islands : type TODO. Returns ------- model: skater SKATER instance """ model = Spanning_Forest() model.fit(n_clusters, w, data=X.values, quorum=floor, trace=trace) model.labels_ = model.current_labels_ return model def azp(X, w, n_clusters=5, **kwargs): """AZP clustering algorithm Parameters ---------- X : array-like n x k attribute data w : PySAL W instance spatial weights matrix n_clusters : int, optional, default: 5 The number of clusters to form. Returns ------- model: region AZP instance """ model = AZP() model.fit_from_w(attr=X.values, w=w, n_regions=n_clusters) return model def max_p(X, w, threshold_variable="count", threshold=10, **kwargs): """Max-p clustering algorithm :cite:`Duque2012` Parameters ---------- X : array-like n x k attribute data w : PySAL W instance spatial weights matrix threshold_variable : str, default:"count" attribute variable to use as floor when calculate threshold : int, default:10 integer that defines the upper limit of a variable that can be grouped into a single region Returns ------- model: region MaxPRegionsHeu instance """ model = MaxPRegionsHeu() model.fit_from_w(w, X.values, threshold_variable, threshold) return model PKrNfFC??geosnap/analyze/dynamics.py"""Transition and sequence analysis of neighborhood change.""" import itertools from itertools import combinations from giddy.markov import Markov, Spatial_Markov import numpy as np import scipy.spatial.distance as d from libpysal.weights.contiguity import Queen, Rook from libpysal.weights.distance import KNN, Kernel class Transition(object): """ (Spatial) Markov approach to transitional dynamics of neighborhoods. Parameters ---------- dataset : geosnap.Dataset geosnap dataset object with column defining neighborhood clusters w_type : libpysal spatial weights type ("rook", "queen", "knn" or "kernel") spatial weights object. w_kwds : dict dictionary with options to be passed to libpysal.weights generator permutations : int, optional number of permutations for use in randomization based inference (the default is 0). cluster_type : string cluster algorithm (specification) used to generate neighborhood types, such as "ward", "kmeans", etc. Attributes ---------- p : matrix (k, k), transition probability matrix for a-spatial Markov. transitions : matrix (k, k), counts of transitions between each neighborhood type i and j for a-spatial Markov. T : matrix (k, k, k), counts of transitions for each conditional Markov. T[0] is the matrix of transitions for observations with categorical spatial lags of 0; T[k-1] is the transitions for the observations with lags of k-1. P : matrix (k, k, k), transition probability matrix for spatial Markov. First dimension is the conditioned on the categorical spatial lag. """ def __init__(self, dataset, w_type, w_kwds=None, permutations=0, cluster_type=None): y = dataset.census.copy().reset_index() y = y[['geoid', 'year', cluster_type]] y = y.groupby(['geoid', 'year']).first().unstack() y = y.dropna() tracts = dataset.tracts.copy().merge( y.reset_index(), on='geoid', how='right') w_dict = {'rook': Rook, 'queen': Queen, 'knn': KNN, 'kernel': Kernel} w = w_dict[w_type].from_dataframe(tracts) y = y.astype(int) sm = Spatial_Markov( y, w, permutations=permutations, discrete=True, variable_name=cluster_type) self.p = sm.p self.transitions = sm.transitions self.P = sm.P self.T = sm.T self.summary = sm.summary self.cluster_type = cluster_type # keep the spatial markov instance here in case that users want to # estimate steady state distribution etc self.sm = sm class Sequence(object): """ Pairwise sequence analysis. Dynamic programming if optimal matching. Parameters ---------- y : array one row per sequence of neighborhood types for each spatial unit. Sequences could be of varying lengths. subs_mat : array (k,k), substitution cost matrix. Should be hollow ( 0 cost between the same type), symmetric and non-negative. dist_type : string "hamming": hamming distance (substitution only and its cost is constant 1) from sklearn.metrics; "markov": utilize empirical transition probabilities to define substitution costs; "interval": differences between states are used to define substitution costs, and indel=k-1; "arbitrary": arbitrary distance if there is not a strong theory guidance: substitution=0.5, indel=1. "tran": transition-oriented optimal matching. Sequence of transitions. Based on :cite:`Biemann:2011`. indel : float insertion/deletion cost. cluster_type : string cluster algorithm (specification) used to generate neighborhood types, such as "ward", "kmeans", etc. Attributes ---------- seq_dis_mat : array (n,n), distance/dissimilarity matrix for each pair of sequences classes : array (k, ), unique classes k : int number of unique classes label_dict : dict dictionary - {input label: int value between 0 and k-1 (k is the number of unique classes for the pooled data)} Examples -------- >>> import numpy as np 1. Testing on unequal string sequences 1.1 substitution cost matrix and indel cost are not given, and will be generated based on the distance type "interval" >>> seq1 = 'ACGGTAG' >>> seq2 = 'CCTAAG' >>> seq3 = 'CCTAAGC' >>> seqAna = Sequence([seq1,seq2,seq3],dist_type="interval") >>> seqAna.k 4 >>> seqAna.classes array(['A', 'C', 'G', 'T'], dtype='>> seqAna.subs_mat array([[0., 1., 2., 3.], [1., 0., 1., 2.], [2., 1., 0., 1.], [3., 2., 1., 0.]]) >>> seqAna.seq_dis_mat array([[ 0., 7., 10.], [ 7., 0., 3.], [10., 3., 0.]]) 1.2 User-defined substitution cost matrix and indel cost >>> subs_mat = np.array([[0, 0.76, 0.29, 0.05],[0.30, 0, 0.40, 0.60],[0.16, 0.61, 0, 0.26],[0.38, 0.20, 0.12, 0]]) >>> indel = subs_mat.max() >>> seqAna = Sequence([seq1,seq2,seq3], subs_mat=subs_mat, indel=indel) >>> seqAna.seq_dis_mat array([[0. , 1.94, 2.46], [1.94, 0. , 0.76], [2.46, 0.76, 0. ]]) 1.3 Calculating "hamming" distance will fail on unequal sequences >>> seqAna = Sequence([seq1,seq2,seq3], dist_type="hamming") Traceback (most recent call last): ValueError: hamming distance cannot be calculated for sequences of unequal lengths! 2. Testing on equal string sequences >>> seq1 = 'ACGGTAG' >>> seq2 = 'CCTAAGA' >>> seq3 = 'CCTAAGC' 2.1 Calculating "hamming" distance >>> seqAna = Sequence([seq1,seq2,seq3], dist_type="hamming") >>> seqAna.seq_dis_mat array([[0., 6., 6.], [6., 0., 1.], [6., 1., 0.]]) 2.2 User-defined substitution cost matrix and indel cost (distance between different types is always 1 and indel cost is 2) - give the same sequence distance matrix as "hamming" distance >>> subs_mat = np.array([[0., 1., 1., 1.],[1., 0., 1., 1.],[1., 1., 0., 1.],[1., 1., 1., 0.]]) >>> indel = 2 >>> seqAna = Sequence([seq1,seq2,seq3], subs_mat=subs_mat, indel=indel) >>> seqAna.seq_dis_mat array([[0., 6., 6.], [6., 0., 1.], [6., 1., 0.]]) 2.3 User-defined substitution cost matrix and indel cost (distance between different types is always 1 and indel cost is 1) - give a slightly different sequence distance matrix from "hamming" distance since insertion and deletion is happening >>> subs_mat = np.array([[0., 1., 1., 1.],[1., 0., 1., 1.],[1., 1., 0.,1.],[1., 1., 1., 0.]]) >>> indel = 1 >>> seqAna = Sequence([seq1,seq2,seq3], subs_mat=subs_mat, indel=indel) >>> seqAna.seq_dis_mat array([[0., 5., 5.], [5., 0., 1.], [5., 1., 0.]]) 3. Not passing proper parameters will raise an error >>> seqAna = Sequence([seq1,seq2,seq3]) Traceback (most recent call last): ValueError: Please specify a proper `dist_type` or `subs_mat` and `indel` to proceed! >>> seqAna = Sequence([seq1,seq2,seq3], subs_mat=subs_mat) Traceback (most recent call last): ValueError: Please specify a proper `dist_type` or `subs_mat` and `indel` to proceed! >>> seqAna = Sequence([seq1,seq2,seq3], indel=indel) Traceback (most recent call last): ValueError: Please specify a proper `dist_type` or `subs_mat` and `indel` to proceed! """ def __init__(self, y, subs_mat=None, dist_type=None, indel=None, cluster_type=None): y = np.asarray(y) merged = list(itertools.chain.from_iterable(y)) self.classes = np.unique(merged) self.k = len(self.classes) self.n = len(y) self.indel = indel self.subs_mat = subs_mat self.cluster_type = cluster_type self.label_dict = dict(zip(self.classes, range(self.k))) y_int = [] for yi in y: y_int.append(list(map(self.label_dict.get, yi))) y_int = np.array(y_int) if subs_mat is None or indel is None: if dist_type is None: raise ValueError("Please specify a proper `dist_type` or " "`subs_mat` and `indel` to proceed!") else: if dist_type.lower() == "interval": self.indel = self.k - 1 self.subs_mat = np.zeros((self.k, self.k)) for i in range(0, self.k - 1): for j in range(i + 1, self.k): self.subs_mat[i, j] = j - i self.subs_mat[j, i] = j - i self._om_dist(y_int) elif dist_type.lower() == "hamming": if len(y_int.shape) != 2: raise ValueError('hamming distance cannot be calculated for ' 'sequences of unequal lengths!') hamming_dist = d.pdist(y_int, metric='hamming') * y_int.shape[1] self.seq_dis_mat = d.squareform(hamming_dist) elif dist_type.lower() == "arbitrary": self.indel = 1 mat = np.ones((self.k, self.k)) * 0.5 np.fill_diagonal(mat, 0) self.subs_mat = mat self._om_dist(y_int) elif dist_type.lower() == "markov": p = Markov(y_int).p self.indel = 1 mat = (2-(p+p.T))/2 np.fill_diagonal(mat, 0) self.subs_mat = mat self._om_dist(y_int) elif dist_type.lower() == "tran": #sequence of transitions self.indel = 2 y_uni = np.unique(y_int) dict_trans_state = {} trans_list = [] for i, tran in enumerate(itertools.product([-1], y_uni)): trans_list.append(tran) dict_trans_state[tran] = i for i, tran in enumerate(itertools.product(y_uni, y_uni)): trans_list.append(tran) dict_trans_state[tran] = i + len(y_uni) subs_mat = np.ones((self.k * (self.k + 1), self.k * (self.k + 1))) np.fill_diagonal(subs_mat, 0) for row in range(self.k ** 2): row_index = row + self.k row_tran = trans_list[row_index] for col in range(self.k ** 2): col_Index = col + self.k col_tran = trans_list[col_Index] if row_tran[0] == row_tran[1]: if col_tran[0] == col_tran[1]: subs_mat[row_index, col_Index] = 0 elif row_tran[0] != row_tran[1]: if col_tran[0] != col_tran[1]: subs_mat[row_index, col_Index] = 0 self.dict_trans_state = dict_trans_state self.subs_mat = subs_mat # Transform sequences of states into sequences of transitions. y_int_ext = np.insert(y_int, 0, -1, axis=1) y_tran_index = np.zeros_like(y_int) y_tran = [] for i in range(y_int.shape[1]): y_tran.append( list(zip(y_int_ext[:, i], y_int_ext[:, i + 1]))) for i in range(y_int.shape[0]): for j in range(y_int.shape[1]): y_tran_index[i, j] = dict_trans_state[y_tran[j][i]] self._om_dist(y_tran_index) else: self._om_dist(y_int) def _om_pair_dist(self, seq1, seq2): ''' Method for calculating the optimal matching distance between a pair of sequences given a substitution cost matrix and an indel cost. Arguments --------- seq1 : array (t1, ), the first sequence seq2 : array (t2, ), the second sequence Returns ------- D : array (t2+1, t1+1), score matrix: D[i+1,j+1] is the best score for aligning the substring, seq1[0:j] and seq2[0:i], and D[t2+1, t1+1] (or D[-1,-1]) is the global optimal score. ''' t1 = len(seq1) t2 = len(seq2) D = np.zeros((t2 + 1, t1 + 1)) for j in range(1, t1 + 1): D[0, j] = self.indel * j for i in range(1, t2 + 1): D[i, 0] = self.indel * i for i in range(1, t2 + 1): for j in range(1, t1 + 1): gaps = D[i, j - 1] + self.indel gapt = D[i - 1, j] + self.indel match = D[i - 1, j - 1] + self.subs_mat[seq1[j - 1], seq2[i - 1]] D[i, j] = min(match, gaps, gapt) return D def _om_dist(self, y_int): ''' Method for calculating optimal matching distances between all sequence pairs. Arguments --------- y_int : array Encoded longitudinal data ready for optimal matching. Note ---- This method is optimized to calculate the distance between unique sequences only in order to save computation time. ''' y_str = [] for i in y_int: y_str.append(''.join(list(map(str, i)))) moves_str, counts = np.unique(y_str, axis=0, return_counts=True) uni_num = len(moves_str) dict_move_index = dict(zip(list(moves_str), range(uni_num))) # moves, counts = np.unique(y_int, axis=0, return_counts=True) y_int_uni = [] for i in moves_str: y_int_uni.append(list(map(int, i))) uni_seq_dis_mat = np.zeros((uni_num,uni_num)) for pair in combinations(range(uni_num), 2): seq1 = y_int_uni[pair[0]] seq2 = y_int_uni[pair[1]] uni_seq_dis_mat[pair[0], pair[1]] = self._om_pair_dist(seq1, seq2)[-1, -1] uni_seq_dis_mat = uni_seq_dis_mat + uni_seq_dis_mat.transpose() seq_dis_mat = np.zeros((self.n, self.n)) for pair in combinations(range(self.n), 2): seq1 = y_str[pair[0]] seq2 = y_str[pair[1]] seq_dis_mat[pair[0], pair[1]] = uni_seq_dis_mat[dict_move_index[seq1], dict_move_index[ seq2]] self.seq_dis_mat = seq_dis_mat + seq_dis_mat.transpose() PKi{Nt@ @ geosnap/analyze/incs.py""" Indicators of Neighborhood Change """ from collections import defaultdict import numpy as np def _labels_to_neighborhoods(labels): """Convert a list of labels to neighborhoods dictionary Parameters ----------- labels: list of neighborhood labels Returns ------- neighborhoods: dictionary key is the label for each neighborhood, value is the list of area indexes defining that neighborhood Examples -------- >>> labels = [1,1,1,2,2,3] >>> neighborhoods = _labels_to_neighborhoods(labels) >>> neighborhoods[1] [0, 1, 2] >>> neighborhoods[2] [3, 4] >>> neighborhoods[3] [5] """ neighborhoods = defaultdict(list) for i, label in enumerate(labels): neighborhoods[label].append(i) return neighborhoods def linc(labels_sequence): """Local Indicator of Neighborhood Change Arguments --------- labels_sequence: sequence of neighborhood labels (n,t) n areas in t periods first element is a list of neighborhood labels per area in period 0, second element is a list of neighborhood labels per area in period 1, and so on for all T periods. Returns ------- lincs: array local indicator of neighborhood change over all periods Notes ----- The local indicator of neighborhood change defined here allows for singleton neighborhoods (i.e., neighborhoods composed of a single primitive area such as a tract or block.). This is in contrast to the initial implementation in :cite:`Rey_2011` which prohibited singletons. Examples -------- Time period 0 has the city defined as four neighborhoods on 10 tracts: >>> labels_0 = [1, 1, 1, 1, 2, 2, 3, 3, 3, 4] Time period 1 in the same city, with slight change in composition of the four neighborhoods >>> labels_1 = [1, 1, 1, 1, 1, 2, 3, 3, 3, 4] >>> res = linc([labels_0, labels_1]) >>> res[4] 1.0 >>> res[1] 0.25 >>> res[7] 0.0 >>> res[-1] 0.0 And, in period 2, no change >>> labels_2 = [1, 1, 1, 1, 1, 2, 3, 3, 3, 4] >>> res = linc([labels_1, labels_2]) >>> res[0] 0.0 We can pass more than two time periods, and get a "time-wise global linc" for each unit >>> res = linc([labels_0, labels_1, labels_2]) >>> res[0] 0.25 """ ltn = _labels_to_neighborhoods neighborhood_sequences = [ltn(labels) for labels in labels_sequence] ns = neighborhood_sequences n_areas = len(labels_sequence[0]) lincs = np.zeros((n_areas,)) T = len(labels_sequence) for i in range(n_areas): neighbors = [] for t in range(T): neighbors.append(set(ns[t][labels_sequence[t][i]])) intersection = set.intersection(*neighbors) union = set.union(*neighbors) n_union = len(union) if n_union == 1: # singleton at all points in time lincs[i] = 0. else: lincs[i] = 1.0 - ((len(intersection)-1)/(n_union-1)) return lincs PKmN|'rF__geosnap/data/.gitignoreltdb/* geolytics/* __pycache__/* ltdb.parquet.gzip ncdb.parquet.gzip ltdb.parquet ncdb.parquet PKrNgeosnap/data/README.md# Data geosnap's `data` module can ingest data from existing longitudinal databases like the Geolytics [Neighborhood Change Database](http://geolytics.com/USCensus,Neighborhood-Change-Database-1970-2000,Products.asp) and Brown University's [Longtitudinal Tract Database](https://s4.ad.brown.edu/projects/diversity/researcher/bridging.htm), and it can download original survey and geospatial data directly from the US Census. To facilitate multiple analyses, geosnap provides functions to cache longitudinal databases to disk. Once they are registered with geosnap, these databases can be queried rapidly to create datasets for analyzing neighborhood dynamics at any scale. ## Importing External Databases ### Longitudinal Tract Database (LTDB) The [Longitudinal Tract Database (LTDB)](https://s4.ad.brown.edu/projects/diversity/Researcher/LTDB.htm) is a freely available dataset developed by researchers at Brown University that provides census data harmonized to 2010 boundaries. To import LTDB data into geosnap, proceed with the following: 1. Download the raw data from the LTDB [downloads page](https://s4.ad.brown.edu/projects/diversity/Researcher/LTBDDload/Default.aspx). Note that to construct the entire database you will need two archives: one containing the sample variables, and another containing the "full count" variables. - Use the dropdown menu called **select file type** and choose "full"; in the dropdown called **select a year**, choose "All Years" - Click the button "Download Standard Data Files" - Repeat the process, this time selecting "sample" in the **select file type** menu and "All years" in the **select a year** dropdown 2. Note the location of the two zip archives you downloaded. By default they are called - `LTDB_Std_All_Sample.zip` and - `LTDB_Std_All_fullcount.zip` 3. Start ipython/jupyter, import geosnap, and call the `read_ltdb` function with the paths of the two zip archives you downloaded from the LTDB project page: ```python from geosnap.data import read_ltdb # if the archives were in my downloads folder, the paths might be something like this sample = "~/downloads/LTDB_Std_All_Sample.zip" full = "~/downlodas/LTDB_Std_All_fullcount.zip" read_ltdb(sample=sample, fullcount=full) ``` The reader will extract the necessary data from the archives, calculate some additional variables, and store the database as an apache parquet file. It will also return a pandas DataFrame if you want to get started right way or if you want inspect the variables. ### Geolytics Neighborhood Change Database 1. Open the Geolytics application 2. Choose "New Request": ![Choose "New Request"](geolytics/geolytics_interface1.PNG) 3. Select CSV or DBF 4. Make the following Selections: - **year**: all years in 2010 boundaries - **area**: all census tracts in the entire united states - **counts**: [right click] Check All Sibling Nodes ![](geolytics/geolytics_interface2.PNG) 5. Click `Run Report` 6. Note the name and location of the CSV you created 7. Start ipython/jupyter, import geosnap, and call the `read_ncdb` function with the path of the CSV: ```python from geosnap.data import read_ncdb ncdb = "geolytics_full.csv" read_ncdb(ncdb) ``` ## Metropolitan Area Boundaries Since a common use-case is analyzing neighborhood dynamics at the metropolitan scale, geosnap makes available a set of core-based statistical area (CBSA) geographic [boundaries](ftp://ftp2.census.gov/geo/tiger/TIGER2018/CBSA/tl_2018_us_cbsa.zip) that can be passed to `geosnap.data.Dataset` as a [set of] clipping feature(s) to quickly generate metro-scale extracts. The boundaries are provided as a GeoDataFrame available under the `geosnap.metros` attribute ```python In [1]: from geosnap import metros In [2]: metros.head() Out[2]: GEOID NAME NAMELSAD geometry 0 40340 Rochester, MN Rochester, MN Metro Area POLYGON ((-92.67871699999999 44.195516, -92.67... 1 39580 Raleigh, NC Raleigh, NC Metro Area POLYGON ((-78.546414 36.021826, -78.5464059999... 2 39660 Rapid City, SD Rapid City, SD Metro Area POLYGON ((-103.452453 44.140772, -103.452465 4... 3 40380 Rochester, NY Rochester, NY Metro Area POLYGON ((-77.99728999999999 43.132981, -77.99... 4 39700 Raymondville, TX Raymondville, TX Micro Area POLYGON ((-97.872384 26.433535, -97.875276 26.... ``` ## Creating Datasets for Analysis To perform neighborhood analyses, geosnap provides the `Dataset` class which stores information about the spatial boundaries and social composition of a study area. Creating a set of neighborhoods is as simple as instantiating the Dataset class with a location filter and a source database. The location filter can be either a `geopandas.GeoDataFrame` that defines the total extent of a study area (such as an MSA), or a list of state and county FIPS codes. To use a boundary: ```python import geopandas from geosnap import metros from geosnap.data import Dataset import libpysal # read in a geodataframe of Virginia an instantiate the Dataset class with it va = gpd.read_file(libpysal.examples.get_path('virginia.shp')) virginia = Dataset(name='Virginia', source='ltdb', boundary=va) # To use a metropolitan boundary, first select the appropriate area of interest with pandas conventions dc_metro = metros[metros.NAME.str.startswith('Washintgon-Arlington')] wash_dc = Dataset(name='Washington DC Metro', source='ltdb', boundary=dc_metro) ``` To use a list of FIPS: ```python from geosnap.data import Dataset # Maryland's fips code is 24, Baltimore City is 510 and Baltimore County is 005 baltimore = Dataset(name='Baltimore', source='ltdb', states='24', counties = ['005', '510']) ``` PKs*NP JJgeosnap/data/__init__.pyfrom .data import Community, metros, read_ltdb, read_ncdb, dictionary, db PKvMcCgeosnap/data/cbsas.parquetPAR1L T\92ƯQQ4z)qIa=xRM9Ѓ{?+6/Ru/cis_}z<;/<<^H<YD)HHv۳HHpa%5!"f{a5[ idF"cy&I2 PRJ`RJUIyJJo5)ZkֺuMב4Ic1Ɯ٘愌iYkֶ٬m$&Z{vg羫sι:='s\˘^ʝu]|W+߻n]ޅ[W+_ ) }1AuT I91-|*Wg""R'5^˦o:[Z?b9Z 5N?N`i1Yկ[~cȓ9/GJɓpɖ-""'K2KDN.DNN9{`.5рNZVWs_^oF8P/b}}o191 PbB/$QBAZT6 F@L4bwE!A!@ @//Y5,C |5`O 0A/{Z$c= _6.րEO Fm/xUg@]͜hdǵA#$IH`mSݕ ֺEk5d4|:B$rK # : Ij$X$I2ym!w*G |LĈh Ihq*M"H2y"1 $I&"0 |yc"dddH?o" ,4982010100}p~t X F8Ԇioe^)H68j !@H0p"x0Zb]B@P ( qY| 8k@;2H 0Jއjņ?mRI2<[ }E>=|>ڻ-Z3w^^` 8ی29L: 6g Pu G…!3`[ g ЅX g i'^e[X.[E{:C>Gm>g|WoY@HuyNvZy_8:{&L~\zMo< [CQCQC1rLz"]R5w 4PXNNs j$#CgY[Rt 4b iHAv e Y6R81}9~=z.{wAMps b1ͱL/Xw(L8ŵ$\{&pM|iY}0|$Q]4UIgz^y^W\uӸ}y}8ID)% P40, 6 v (H$H(ʪ( hڶ :Qh.0c}5X-X8xޯA(ND4MD1H$)Id9Oű(+I-4Uì4MjIyNlFYa4TǙ$ieeeY7me۶]e; Ӝ$yҁn{ygyO_gV#YVdUEL[|P%Ir\j*KӍKL˲l)\3ª(NҲ,*UUUUUUumVUu]וڿjƨkkE]yE]vU]E0M8f]8NM4A&$YM4ӶJiYmYY)ۖ5uڶmi= 캎8|^ۮ,/ڮ벪i^v}ϳ=nmf]߫qV}v}0N0,0m0A^0~0 88nܓn8}884A,4I4EY]4E]?4M<4M48<ü<~<< N ,˲((˲$A$I^$~ 뺢(Mۺ$˺,뺪뺦i^ۺe]ۺe<˺ۺ۶a,im۶i^۶eݼm˶8M۾C 꼬˾y^~qqmqNqI]qþqq/ y(ynyyeym?yy^NuQuYuYu&1 5 CBSA Code1&6(4982010100L>Q_rAsܾ÷nvhkfQ館Mc?k "@o 1@6Ưnlb#V젗7LH_E2Ƿρm /-,@@ :8T?.~2cA)c9眃BI)b=c1F)Z^s{޽(&7 5Metropolitan Division Code&52@@(@@LwĢ{`0 njm?an0!eL_ϊR>?뉯A2݅r}F)cxz:Z-;lW 7Sk6\x<dA-z{bL>Q?YH-a8k6#͎ ck;)ŀ7Nn7¹x ƝboY1u#T^yCv{?pԶOdWӾ 16I>..}cTAMJ3v9˝!@igƅGҊ[Ӈ~;șnȇ.v%>$; Ǫޚ#+X@[SN,@Z@ [d)0g0ß'uyW2Bs)1Ȅ6`y_ E|/{j0{Y?b Cʆ32mFh:1A[Z IˉRwt &9Rcn<ى/.iX_ɩ+ WCC?5yTF04.nu )c#ŭmk_^^c@SU fݔwȬl02/;ʬv$plǬBvU'z,, B"6e.``aao_<0p27XB8þ2϶mI #stinYZ2;{|\(/GS/?wr8jk Vy*(֜7lT;^_˝ʪ:<; Omk|ʒ*eD.Ld0㞪Uo&3D戬/W&wD00fթ 9ZK4ɉ1&P 5CSA Code,&?9@Z@ (@Z@}LFHM>.߯-6z_1= `a9TSԥoUIY6A8gɾj&U(cQ\Y.*7%?V}`V 68;1$ᮄ:%ڕJ*ٕJǦ+3\kqJX~tӘhqP]k%NYW_w[QyUx^k''Hbw^?>/}ѫ)^*"?;(u.OL_+i ґާZx߾D-#U݈; *jˎH^ g`w^<_ yivuR??g)'O=He w^kiNN{]*Ԗ.ZlЎ G]BçG‚9$D׋(sF ,逝A;a2w^=̲xݍ08?{ƋY@QvD[{>ļ i牸Q/f wK'Pĸa'[Z+ʫ=e<ܥQY. 3* TZǬYRK>`QW+Z7Cߞ(<ӆjh ݯ^ԃZF`hdgEbLZ@>:)i\X }=o"_PI@&߫ Jٍ3EҀEOs;߷D/[ےJr+?Y1^Z=Mp {v`d~l_WَeF8#{A>vݤp;(_[ߴg5Fz7-+;-GkJWAGk6{u2VaYB=:]}`q "zf^y+DgH9ifnrYhNDv3A#Kc%1:*yi"J _䬶0R94Qs㮯6"bʙ|peCҠi#Đ|tg<;j?sH-^حS_dِSAkA~c.2ގtιXj v5(|Qb=A%_.nBEgdJs MɹaP?/D=X8mԍVvF@ -dCaU:ǎ\F ZET8t(Q{9;zdkei- ~mWTWiW%P#9 z%y"D?8CVb Rc^l_͗tW=/isA\#R02aɻ>$цA{iT+w@D5-.Q2թL ٢ 6ELJL>gef!i4;Mꀇډq%Qɚîhˠ:/?ijs}Nh`hZbz&/INX@Z|&)eXۢ|9F+ bGoǫ HwN)ڔ@T Bmr`kAB]gbb`5ϐUF@"[5HFw(MPagz:;$dYNj V ">(kTsLv'6R[!)%>X["tOOUkEJeF,r!CQ>ىKiNژT)(w[*Ы,XIZlxME%z*S[>+l&HJ7ɶ͕Ef3't:&5ǐ6(I;Mj;Xgnt;m,x5 V+ >R2-dFb+9:ce4j3_:f,TfЍ";&cȼ֏2N)X_eU(C\R`f ?`'!#$Eyz)7I~I|\;v5{<҂➲c?3"^"(Kpq~!bNTbE?XĦ$AWzZy:ۿg4Ђy[8K#wL " VK`F#4!eV4;J % BMb Y?cyOlw'!Jx[^ǣvڹGBWFM GrrD:ty{*t75trC|zlŎ̈DTN>D2Nuf!YW*OV,%o -jL˨Pb٧-0c#lk㰅]ou3I˂ QD!moz~.O3<`TT[Гj| ioIEgw,isl)-%9nieYUTLUZvgCI {d9]vj{e9"_׀ O)m ctr?"W|T+bTG=vHzHp0^U7L*fC޶6X-Y7ѻJ_$ {<-bMfaSUxP1[)_5ɋH7My5/WfG3DlAH2YVxi6@=l !3ZPzn3?+fv>>ĭ tW$ 1;P->KqQWoDW?B7ޡ0?}4;f[1{̎/a+\ys#GI. n[\\r5wO(p[sٶweo1\ >1jkASuke\^.ӌ \M lAV8 F'rN<^~?C 4{0Ʉac S*pP4$\ZAw¼V/:9TS<ح=l )r^%$"Ouӟyy #VO?gZTH7X,D֍~hӘqb$FՉA= CY+Ϩv=&G}[!Z'Q%|H݊\I$)쑑ľ윴 }=MĹ&B*Ik=wН7E.gN'Aw[Q[<)nXӇhO30!WAxO dO"n7hOnQ$I= vhx{ׯ13HkT^U90'NM4AGIS_](Q$8Q#:ҽr5 )ivQC?v96VĤ4$K hvdGQk8 Y,c99(%/Ǘ f9ݒ\ӓI}Eqb6tKlP3yW =@" [ ߹iA;Ǡ71{UaƘq(nJH8f Ϙ\ ~Xd \霦]3?[F H&'~:F`ЎAI` z1=/bcfzVh:npI@`ev|-b$Jp 'h5D<>gCYUz㙑)63f xׇ J0 *$'5$;(po߂#`P8q5Vu~._խDjؓcT>!t`.qѸ4qW g{q;G28aUDD6>6 E㿀SQn/_Q5qWPo!d(Df#Ɖk7&Rwt C߄\ Z/޷IEgѹB=jϱmXGIh(~FThGlFC~}U ~ږr`csLN]zld5:M^!jm[1Y4SkCD׫|V &HAqJeT. T}ֈNzEņ2w 6wTqğy"\〘Y'nv+i&4\?oG:/%TA\tV>;<='"#4uQ 0P ^[O(DlyF,6q"S0Vжs3"ZB;WDR##A~!{"TwH+N.qr{/ɈjJ^fU)wӓ{.[n^R_ (f .+̖jɱFQK&t[SRU5y=r0VF" pT'idދVX˭F%vdQE\"pR@G Ľ*|e|}{Q^QI(T"܊4|wjSh(>?u#;Xb zkBjlGt6:6Bo I>THk XLsʿ̫NH+eE~a=Pc1+{ BK=hl(HwZP2ʛyQX=- x-13ƪatXLlr-]ZpMΏG V8 (@RKbEOS.Ňb0Ncn<ˎ]M$=R NvX(?[^ߑ"iՉa\ti1d%?s Po9A 4[HYe6 L{DЕ)#Ô,;/TC]1-OoiSi9%Xq&:lpV:'&N/oYHЯjv nPWRcu3LJ Id'|*o*ZQ_:BS%} gE%AnԘuE Mɶ)ݘ|Sy_,+!ג *^s UWAJ)ZiǂdDǾ:NKXgE=!S2熠:SH{Jҡ*g)C$Uj6XQp-T#bUr-dj#FL>H##[6EjX8u^4dd>,Fyml#ҚV Wx*jZYCjaT\.(_,X:veq=c Tbܘ3_Kq|=G)!?ު,uӻ$x1ܘþ*c¹߬2Z:gdFq]7Fb@T,H>lF\]idh@%ԝ>(@y WbPjrm <H`yym5ї~ʀf4x06eVK<–#UsZpn!b354BI ܔf)ktq*C',u/'o13=Bgm\OI|vʼ偬vN U(6|_h!Cm# őoZ}̰i9(Z=iRN9.9B W@dJۡo^YJ=/Q @N&=|>ڻ-Z3w^^` 8ی29L: 6g Pu G…!3`[ g ЅX g i'^e[X.[E{:C>Gm>g|WoY@HuyNvZy_8:{&L~\zMo< [CQCQC1rLz"]R5w 4PXNNs j$#CgY[Rt 4b iHAv e Y6R81}9~=z.{wAMps b1ͱL/Xw(L8ŵ$\{&pM|iY}0|$Q]4UIgz^y^W\uӸ}y}8ID)% P40, 6 v (H$H(ʪ( hڶ :Qh.0c}5X-X8xޯA(ND4MD1H$)Id9Oű(+I-4Uì4MjIyNlFYa4TǙ$ieeeY7me۶]e; Ӝ$yҁn{ygyO_gV#YVdUEL[|P%Ir\j*KӍKL˲l)\3ª(NҲ,*UUUUUUumVUu]וڿjƨkkE]yE]vU]E0M8f]8NM4A&$YM4ӶJiYmYY)ۖ5uڶmi= 캎8|^ۮ,/ڮ벪i^v}ϳ=nmf]߫qV}v}0N0,0m0A^0~0 88nܓn8}884A,4I4EY]4E]?4M<4M48<ü<~<< N ,˲((˲$A$I^$~ 뺢(Mۺ$˺,뺪뺦i^ۺe]ۺe<˺ۺ۶a,im۶i^۶eݼm˶8M۾C 꼬˾y^~qqmqNqI]qþqq/ y(ynyyeym?yy^NuQuYuYu& 5 CBSA Title&R6( Zapata, TX Aberdeen, SDRLAD7TSn C #& UW:>ema,Micropolitan Statistical AreaMetropolitan Statistical Area#ćfYd+]SJPR.f#r%|/ir_ļ[+ez )i P& ?T {v)8[tuOzV/̓!.4y;f±(gó?OO/:%!S(1 c|-^(}PB'9}x 1!NRqgu+Ps }o(x#=.+y#csA0_EpAH& 5*Metropolitan/Micropolitan Statistical Area&6(Micropolitan Statistical AreaMetropolitan Statistical Area L>dYRO`[JT@c-fYj\̶^ {p=I\RXfo$Qb W*A4:6SLjGB>̼$ɖwVS|iwpA= Sr גg GdX|τ I"A&}).T1Zñcɱ*M쇶pݖDgd{v ūjXZ,ٙsW4Ur( Vg"*>90vnE!PoV5e{m ns^V-`KNJ;>BaKc#Ƒ u,w*юQAWm D z &{fc@hdv*\^o59#ΟȓT6T XRd3_M?J֙bt9gEzFh,Pf Ww% F>}çrnq>X:i 2{#,Wilmington, DE-MD-NJAnaheim-Santa Ana-Irvine, CA :8T?.~2cA)c9眃BI)b=c1F)Z^s{޽(& 5Metropolitan Division Title &6(Wilmington, DE-MD-NJAnaheim-Santa Ana-Irvine, CAX/L>}MuƫΦ!Y jM{, )(sn~{6@,wYs):B!B"/%E35NfɎD X;><<便U!a6^EW>~xx%L* vQ^I4t8I|!?jh~4qWn$.`LyDuA*܉+SФ_i8?Ahv9zsʔx( 3 9#5 $yũ-zE G˃2q-=eOçh 5baÃJJ#<\N8֢YqgCb<$3yh0 ŅFþkT^*?NG(Ǖiod4%l]޹ yȰ>]# Z]q@ uC٣7wꅴ7 u_<13[¹}t=XsI=Tǿr! @=u"A\~1ͣeH>=aO6Zw ܓ3WB-kЉc'aG9,^Gu8_oZՍpCZ1D,LQ%`_`gQDKWfBKNsGds&'6 boy.6Gz~&vH``+s|4[RފvNO*b} op?}5st~ApSreWfZƀ/!?8{a'82M 9Ca LrADL+ґ+0[nuwY&h_;)~VDG\W"c=25q=gklW!g1Hحx< 6<2 }zŞ@MQX_tWdY< ?һ&h"_.(RG hpˌ<;x=Z6IC@OU2UU;MwcJİwc߶!d8c',lOGCIw O,=É#vS\u.4 M|X4U쾫-S8`DP2~(>?HG)j[~*$#Q, @kԔIY>o=Z@k64OC0rHZǥxc٦ʅC7oroў0Bc'%r'70cG`sQ0Y^QhiZ)9x&HY#;T(Ծ4VV@uӧ݃p> ,632c55ĵR/ ,;Gb=Ke' c' Βԇ~^dB&5@*'nbdHi\4Lt1%ݯѯL^(›8r&ENE>a>?R4kۍ! bRIVs$e ErV[|U7 #v[%Rʧ9rQ% T{ LCb}@ G vH}C3@C[3I 2M0^'qIIm@t-$PFbi8Q+` 5GD{DM L-l1=~2 ʹSZaM%ib\-uq&⦕$P__@F [7(Y YXN_P7+( o#{ BU(8F= %2ڡ-7@_` 'Nef_ˋo+]B+8Q/;\lUTH(4FZYkŹ>E$V^k<'[v"O+0![iGR{f7XCZh lgj1+YP-due;%;n4:Mi j@H*5Fi{ iwy5溩YA:9;jrd^NW*pzoU{h:6rcj"Ձ P@h[e{E"LQԨ*2ۼ % o2yfrJx|m^҂;U 3p0'QLA81^)%r@uQ 3M^K5Z $C{F 36J6[ e?upB fr'lj!:_G `ɕatl-#* ^B\+3 7 -OEzCD׿n2g5T1\ɈQ*rc< GGR7F䱳+r=i]nΑunEǏ w.L&42ί9iџR3҆*яa/WKJ_sy^@}ftIR9&m~em^T8s(ɥASurf@"印 |^-*Z;}Ik;; '˚T[$'|Tz]BP<3F A]:qiפ *GeT܌wEOROT!tPڏ;5Jd$Zln59e -v(5ӴɯߙDs9o׵bgrNtq]+5b j1SEou:Lyj_rώsZYXf4dItXNߤ5OI"VgP$m1fBsj &K0%@-A Nqk,q,Youngstown-Warren, OH-PAAlbany-Schenectady, NY [d)0g0ß'uyW2Bs)1Ȅ6`y_ E|/{j0{Y?b Cʆ32mFh:1A[Z IˉRwt &9Rcn<ى/.iX_ɩ+ WCC?5yTF04.nu )c#ŭmk_^^c@SU fݔwȬl02/;ʬv$plǬBvU'z,, B"6e.``aao_<0p27XB8þ2϶mI #stinYZ2;{|\(/GS/?wr8jk Vy*(֜7lT;^_˝ʪ:<; Omk|ʒ*eD.Ld0㞪Uo&3D戬/W&wD00fթ 9ZK4ɉ1& 5 CSA TitleoA&6 (Youngstown-Warren, OH-PAAlbany-Schenectady, NY{L?_HM>i/fyًf7Gd"PF & eٜ7K3214_"ek6J=.u7v;lHrJN*!PWV;~ \VyQ8eQd֚u$q$.|A d|H#j}]n:-?m6Eek;R^5jf?zxi~H/X%yDՃo1yv<~هgx9!0 (}GuC^޽k!ۿcktZDG8ZqHS$]wR_K:~!33WK~WI|$ \v(]O\Dxk^oH5x3g|1gN!,/G}wH9f cbP \$ӝ?|*ϘP .t_c*\cv%Ϯ0#ʛBQDeU^rh0NQo AD~ OpCtoDz'i{Y13|5b -B窞: c#nbzvCp\(9KOa?8zd?vo:RjO_h\xS HH)EtE5~]ӆOY9n(0T{nyD;nHڇb*OX;*Ib#G\]ށy(G#$x;^biV,cf+:^SOD ^mjyB3+?Ih'fet0B#84KѳƼk:@q'X(ƙ^تdcgdjofb KJ;wB#LH3ž!I?;<Ӽ`5An`w,YBzPʎڈonsMm0'KldlB%Ûx[ۍW>:^`anlڎېu6NTS8Y? [ BtꞤQE]8J$f Y_+*,B%#b-7|w'1fQN*߽ؐl P27  qu+pnf)0\xCs $X"/i?y/d 4ږ)a#.eи8^YIvPiEzK' 5.DSp= ){lD#qyo%N%7jf ߉xc 'F:G'Pdf5JW[*+و0V/[L՛[Ľ;EL'מ#^F2Vqx\lO&g|8x Q4[\;E0ѹ8rOZ HO14o ̦! YKͦNm9e y~0W2_t! Əp{`h|cc42hNH7W-b"i) T{ Ab GY tY{MX͛v#̀l.P#^ Tmr5 F$&G%GO-m }#`:Yufςz#D/r{ݖ!(i4lf3!ěۍ0$\]q"c *ck;#>HNnMh&#Ir#fd4=)aa0=r|JS ^gCt@R XiJ= }Գ0VV_d̶œrx+%`}G;,v_˜'YC/y$_kDf@; ϔ:ΣI6k ѿvr"vCWĻVIKn~qЉ%{*/E(#瓞pq$L H[:<&͌祐~BO/tz!Gߑԍli 0Y>ceʔDሾ2J f'KIlA`vD6/S&Kc>bx&hv/*7Q/6HMs߈WC *Kwdä_tbu$v;ajc+dEN^cZOXzH\&ǰ4caR`'%_yM! m;<@W9A_B:#9ݷGp46I̺-Ea85H>#%;)), Zapata County Acadia Parish- @H8&@ XȀ6x F@(H V0 f@8ȁv@@"HH&8 P@*X PB-p ^ 1!e0А5 mp9́u@Co)}g?_OH@"P 4@J (!A4 rӈ9AjM$4! UB2d )Ć8dF^Ї@"c,Qrԣ JDJRT,u)Lg= Pޠ)4bST<)P!^ @QT{0C UʀVŪV5 pU T*XJV:ŬhU+VUƠ]5 zL| VW B`="E4k(2ֱud5AYCIJ` Yrֳ-iMZꉵ xʖ)-l[{G-pK\"W(Q.s\AWttA]bw ZX0/x̀^FAU_ _‚س/ _5<%F`Kư91"&LbŔ1JdLc1:1c FF' R1be,k^SfffFf8˙vƳIgVqπCmh!Z`AHKҖƴ9iP4eCSZ@2mXjX˚ֶƵu-^6E!bc+ElhK 6mox&ѭn|` Eo{#G 7Jo jrp|$Nq31Ÿ9q\bMr0P9]s Ν#2z}t I"ёt;R:h:ps|.vhW;ݎ4]twݍw{%|ׄ ; w @+^kw)Ȣ$OBҐDd"ɊEFtHH d(Y/Xd&5MrE'!IPR$Ji46g@,7S)\%+[W2cB%kAXrNe-my &yr] D|K`S$&YiL,H2{]k3ALhFS#&5ikb3&7o3\8UCN ӑ9{pNtS;wF RfT#%iI& 5County/County Equivalentȧ&6( Zapata County Acadia Parish Lhs5S^ I &EtX÷5\qmB'lq5&_^koU4f28/Z!{{K>Te4Roy笶OF 39\kłmm cBZ- #@h>A z㐪>,ݧ,oB4k`~i8h~L O$M(#Զb _/h$ ݯeEy} , 8_Z^qMPVJ=2ߋqꛆWf~"kSOmݣ3O wCFJN,WyomingAlabamaUdMOl=͹d,=%sFLpM7MڒMM:9`# 0M mQJA}|hM4MJnkH7q.G~B 'reHĭtbA~>a΢ ΚBy[u[1{9\frF7atH Z:Z k;G%䔄TFGY4=ÍjgjBP)2hOM3̗nn+=L5aLG݄٣3{t%Scӵ) v2:s\8G'^'dpEeQB;X9Jp.ABQr~`5  hh|Nyt}4N~\ɩyQA HQIfr i}ӛ'k68;f6 &¨mJcXV[$vT@V߇OԻreS$?Uo 8{I~ą->͑]kGM V5fi@>Zru9|Mp4T*LJ!`i>z)92v'VL؝ջSŘ[ҿӢ8'5XJeFh,[PTAAiD*{|\y[MIQ)Z8u/5 8XdN->kQf)E\y4v2,D'n Xx'O1'T~iDžyF Z9traw rjfZXx-S`P%WL8yr@BN 9k f35~ r=x8'vtsUFE@C:0pM .T־(J ztE2'T;O5t>8 X/4,|Tfeߝo~PbZ4:סYm]lIq#ua΢ ΚBy[u[1{9\frF7atH Z:Z k;G%䔄TFGY4=ÍjgjBP)2hOM3̗nn+=L5aLG݄٣3{t%Scӵ) v2:s\8G'^'dpEeQB;X9Jp.ABQr~`5  hh|Nyt}4N~\ɩyQA HQIfr i}ӛ'k68;f6 &¨mJcXV[$vT@V߇OԻreS$?Uo 8{I~ą->͑]kGM V5fi@>Zru9|Mp4T*LJ!`i>z)92v'VL؝ջSŘ[ҿӢ8'5XJeFh,[PTAAiD*{|\y[MIQ)Z8u/5 8XdN->kQf)E\y4v2,D'n Xx'O1'T~iDžyF Z9traw rjfZXx-S`P%WL8yr@BN 9k f35~ r=x8'vtsUFE@C:0pM .T־(J ztE2'T;O5t>8 X/4,|Tfeߝo~PbZ4:סYm]lIq#u~2:s-  ]ȖCǑ{R:qMkB9ܜb=7++Q*qw𭔮EUЅ)mcyl>0̀{QCiJPUF#-P, FGEPld)YM7 ?t%N `:eƘsJ:kֆ?~ƨ+{U(ւK5ǔ~`z15zγlAhw0jZJbo3,840001d FCFIt֖@,c-.\eT@83Cũilg-wͤkX4!ls zF2Id$E)!B3 eD)@Ͼ/JK5EQ!-@ق&˘Q:%yfiLLW3YNjͯ ط` dԥE<_7zh+)Ao|>nλN}6A텵{}sx}*o6=`aa |(S 86ȽƘqAY|wc[͑> aՆ}d,ii?/X:za֊К:Mɠ`$᠑*qtH3u)U%OX1R4[=Vw%H0%, v-2n߄!/Maԃ#!>h~ַ驶FU<oݼer,Y,͖FG09Xܛ,ZPn- - Wwh[zVg4$AFy@~"kqw[mB.}J%23Re lkrߧy)D0K84U14&(r߯?&t[Dzɣ>AC=gxA`G?6t SkQ`4e'=ӝ蓃wJj+CXa;f-k8..,u?j`ddx3e"W+[<ė[] K bZkg =~Ρa4ka8( aꔺ1[M%j9HvzafTss Y_ d8xg"D2p9;dq@!a}Bծo\لUfY{hQSl ݟt[d䖼:]itQZXhX!QΖZ&3P]Q2sh~ˍ5/eǴm&Y;w翔rR_~62CPo$J/{mc#큒!'֩ BJ"Sބn~^& 5FIPS County Code9#&ı6(840001.6L CentralOutlying,OutlyingCentral.gp ..Xst\gHm01]y*Axl:Cܰ$`6ԘPȷ_0f]~uQl1NcL;yU>s/*܍&:=LZL[ _|fK')hd2ytF S ֢EJ45$[T¥6}$crL*ND@ϱԁkS 왩$uViƎ9ԧX& 5Central/Outlying County&6(OutlyingCentralJLRCHM>̜--+8딘ˀ}}u>0gf]%((h[K?b`h.DZxLŪ-ѣ}w@w8^'/83oxӣG^N:$S&-?q>Aȧg0r9^)Djeq%W^{@|5F169pNcyFY~@@0~'2̕ 0SCئjaoNΦH~6mII{Qc+m`S׏ gf CfLG`k<ҟH{-׼fh.[6q n}/0r.{[ TCE碆myX5W~C,ى;̄jϰbcg}뎫P~1꘣ɷUtk>Hr<ʓsH,FcHԅvt=k ɗU-Bws KᾊU+ە.Or?? O0P뇱;u~Fe>01m;f}sW w,ls_p耧"}c6߱E#rV=PL3N4iU=k".(_kQu!h#"[)UF3&>wЗxR$6>0>\W凌=  uyt=eW@ Ԝ ^>Ể>^p22Y!Pl do`X ֻvXy_V:Ho:j3=lz[(Qyڃxm\hL'C!8) Y ꒴iXЅoRR)4Ѕ5¦."GHvhKd=L,wD4 F_\=Pc1TFɻpy}d8x1U'r6;Jѭvt=G1duZ|K^FikM bq=.y.B-}>|";Fп ?BT: |};<[B$k1(@ޭxDU)rȗ=GK:&d#ښ̾ˣԁLS6N!R4J]'Z8~b0gHClb~E\2{;}^I?L g3?)a%ˠ9+s2\w^>2=*}F(EMWp% e^kM2gYF' F~F~ާ;`D\w%4Ě}|~8@ R4JD,9Hב3-N5_Йu :>V2Iqf̚BP^}=# dDFJuQj$fl#wƋ##ޅG%CLs1$.\r~}pЃcCM b*֡4cAu,tu7跕KʷN爀>\,mX " PۤM#)uĻl"frDxRz7sb u3 ^kz[@7gG`p7|(MDd+~Pyq5oo}Re~~(!gaU^ʖ.|oufwhx1K/(6ʙyCShL6IbmUE~U/%V^<*:]HP#IFVE!H_J2K"ږM~ҝ5U35;7RFCs]ZҭcҼ[D4U9r{ 8bIsvRh%u'wA$* {!Ywg1+M|Q=;#Kng-U *Mh&G)~D1|ܺ5Wfg]?w'ZcAF(Yx3bNҭ̽5H|!cd6rglFĝJVAf-{.IAN{DMi!JTi!EK]rZЯdkd4O; kӌ@Kp^L'q\C;yAIVcrnP~Z .T7C:n@YC}S: T|`;dkQA&1^t#$Wb4xa gY*] EurS@-yq`zIZ+7q7i",M6~ S@W a$bt0dCsv:;4ᐅ ZD0I7a:b*!'0B\J8< 05̚N:Uhzӏ(NJgg%Np e) |SءG'1"@+]{a/Ta)?e{/8>()'11ɡAi{ѯcNqɯ~:;:ܰ,ЇTjw#elYLh4y% э;"J> VFG@lm%#W'a;|m:R|b$bG5Vvjrr<ĄgEBT_*lFW)#l)r,k*ޓ[GA,% h!@Eܧ8 !!iWGda$.:9iʔ;_f ]jɶFeEDGtB!H˱5TvZߠuK54N0׎r G1WēLs)♁.(A'ӫ)ƶB3 pai^8HE1bİJfk"l(mWS1MNn଎]xA3_ (ظ-B_Lbj_5HW!KJXb*f-)<#wD%/~5oT8غTi~zr7⭜CTC$ņNS(h ||2ێ .tpXNO (uHqGS#lP~>XHR%0MPLSɩ9'̱K8ڣPgpr8;*rs;!Vh 6.T Cx\өWK<9iDgP (6TBXs|\HOL ZeuNbܼ2R_,z=/ȤfxDXQ%=mk3Q~3yu 3$:lWٗCr\'Gd":\>پ}Vv1'/{>m#Jhq%#.0v2pPES^,;j 3)YjUrbIIAR> *و*cmdޫ )ǫ6;bma™wU ˆ/{yO]ĂZ^ʺC)/WDJ&g@(|0j沴E ȹoSs=u[4}E/XXT%d 㳝B(Er*CG"G7XG̑Ku#. )(,7215301001[ h,^T ^˙3\IIIΤ~[>Ksi-ZX[~ҼH\2mtE #++ʇ^47447NxnpM@&Q71j n[X J7q5LV44G;LhrKnD5ݜT5~݂zʡi CH fPLǰ0֫2(Lf&>*l_fҳn^lOe&5?Q(NE%%*l~ɠt2gUv ͰRŏٙ$.eg]B k\,7WԳ4x#5=䀸LL 0 op͏C98Ors4r-ϓye }<`QrEETnA=s (³i"16/ώC%@v*tx_RA/߈aVC \ / k(B>*51$ºg1]Eމ7SZĻ5tOFTjz"`q 3=w%f2aRC][GESRPf~R*Ud槕t˳07 <-O0ˋ~_*mO%,JogUiGnUEePW+f};UF=a}^)2&iAJ{;Y!FG$m0;<4_İ"`̎eFnRF,ݢ ,Us,]VHdeWp#[QAI 콏$ʾGe}Y[~r@t'wߪT3\)^[*s 3#D65uTI9}zt>%9U=vJ>#ߨ)gY {nUo3Km?@qqS N\?$F?ÝiƇ)Rg_iy 8=.WR>v5A\`,: {)|4K}\j>]E 6R(xg"h*|2 K|eSUttf(Tf‡kak% +o3:lwYZX!bX?U ||,8e  [ 8gXX*$"WDJGǕRE2MW~Pb9aoЬ ;(inU4RnJ B|)`-h_CيUF'Yٿj+jtw"vmYmWJW" 䠓f;m^y{UrkzCwfXjxP\1OD/LVD"QWp-ᙪAת[" xZ^{ }].Qµ>LQȨn}x%Qk}\Qu4-VRAwTgTw@GMAu&2 ڞC τU;qŨU4u/$՘_T]Kj5@tّDk;a:^Tkr;C@ksNT"0 GvVwQU7(j4m-&Z]>|GDt1V")JW~LQ AW =I4:>9b}nų>L]KsDװ>M\qYduLT ݞdjnAwk5]8&LwEaTwFVkwALtettg黿hnQ=YJ4hŠ'τvO33y8z`gGr|"˫=xT}d휁4G#t.Sk"nMK}?i d8]P]XIUxVO6U]zvDh%+Qb%vIv 53&2d s>]7s/VMK1#$Djx4[Ǟʑ$W[{\xī&T[*W~ו6KP/n:|%72JڣpUzny@_@ԛp hL T9 lo#ҡu$a{"&A5;wRf3\Bw`@X;S%z ݜyhZj0\-Û'gUOћOHb!o.7h5 \"U⭿2 ۀ[ E;Ioۈ%A2˰~)ao%t4`]+O{kREA*[L164'YZLG\Ij ><NքmAL߿Q bIpZbKw*/o?(7bV`|xRt `& 5stcofipsηt&6(7215301001"L;db6`H#C}@ (0 @ ,8 B *40`‚ .<"B)2(PB-:0bŒ+68p/>"L(1$H"M,9(RL*54hҢM.= 2b̄)3,Xb͆-;8r̅+7 2lĨ1&L2mƬ9,ZlŪ56lڲmǮ=:rĩ3.\rƭ;|ǯ?p Ax!At0a|!Fr(QFz 1aƂv8qƃ~ &Bq$I&Cy )QBu4iѦC} 1f”s,YfÖ{9q•w}ןp Ax!At0a|!Fr(QFz 1aƂv8qƃ~ &Bq$I&Cy )QBu4iѦC} 1f”s,YfÖ{9q•w}ן (0 @ ,8 B *40`‚ .<"B)2(PB-:0bŒ+68p/>"L(1$H"M,9(RL*54hҢM.= 2b̄)3,Xb͆-;8r̅+7 2lĨ1&L2mƬ9,ZlŪ56lڲmǮ=:rĩ3.\rƭ;|ǯ?-P`A Xp!@ Tha\x D RdQD Zt1`Ą Vlqą^| $DQbI$E Yr)PD UjiФE]z0dĘ SfYdŚ [v9pę Wnyś_~$XPaE%ZXq%H$YTieȔ%[\y(TXReUTZZu5hԤYVmuԥ[^} 4d؈QcM4eڌYs-XdيUkmشeێ]{8t؉Sg]tڍ[w=xًWo}ۏ_;` @h0`d(PAl8pGb$H!Gj4hѡǀf,Xaǁn| P`!B !Rh1bʼn Qd)RI!Sl9rWPb%J)WRj5jթנQf-ZiסSn=z7`Аa#F7aҔi3f͙`ђe+VYaӖm;vwБc'N9wҕk7nݹѓg/^yӗo?~@HPDH@LXBD EJTѠEFL`ņN\ŇAB%FIRɐ%GEJPFMZХGCF0eƜKVٰeǞGNpƝO^_@A&\HQň'^DIH&]LYȕ'_BE(U\JUըU^FMhզ]N]է߀AC5f܄IS͘5gނEKXf݆M[صg߁CG8u܅KWݸuރGOx݇O_gp Ax!At0a|!Fr(QFz 1aƂv8qƃ~ &Bq$I&Cy )QBu4iѦC} 1f”s,YfÖ{9q•wKsi-ZX[~ҼH\2mtE #++ʇ^47447NxnpM@&Q71j n[X J7q5LV44G;LhrKnD5ݜT5~݂zʡi CH fPLǰ0֫2(Lf&>*l_fҳn^lOe&5?Q(NE%%*l~ɠt2gUv ͰRŏٙ$.eg]B k\,7WԳ4x#5=䀸LL 0 op͏C98Ors4r-ϓye }<`QrEETnA=s (³i"16/ώC%@v*tx_RA/߈aVC \ / k(B>*51$ºg1]Eމ7SZĻ5tOFTjz"`q 3=w%f2aRC][GESRPf~R*Ud槕t˳07 <-O0ˋ~_*mO%,JogUiGnUEePW+f};UF=a}^)2&iAJ{;Y!FG$m0;<4_İ"`̎eFnRF,ݢ ,Us,]VHdeWp#[QAI 콏$ʾGe}Y[~r@t'wߪT3\)^[*s 3#D65uTI9}zt>%9U=vJ>#ߨ)gY {nUo3Km?@qqS N\?$F?ÝiƇ)Rg_iy 8=.WR>v5A\`,: {)|4K}\j>]E 6R(xg"h*|2 K|eSUttf(Tf‡kak% +o3:lwYZX!bX?U ||,8e  [ 8gXX*$"WDJGǕRE2MW~Pb9aoЬ ;(inU4RnJ B|)`-h_CيUF'Yٿj+jtw"vmYmWJW" 䠓f;m^y{UrkzCwfXjxP\1OD/LVD"QWp-ᙪAת[" xZ^{ }].Qµ>LQȨn}x%Qk}\Qu4-VRAwTgTw@GMAu&2 ڞC τU;qŨU4u/$՘_T]Kj5@tّDk;a:^Tkr;C@ksNT"0 GvVwQU7(j4m-&Z]>|GDt1V")JW~LQ AW =I4:>9b}nų>L]KsDװ>M\qYduLT ݞdjnAwk5]8&LwEaTwFVkwALtettg黿hnQ=YJ4hŠ'τvO33y8z`gGr|"˫=xT}d휁4G#t.Sk"nMK}?i d8]P]XIUxVO6U]zvDh%+Qb%vIv 53&2d s>]7s/VMK1#$Djx4[Ǟʑ$W[{\xī&T[*W~ו6KP/n:|%72JڣpUzny@_@ԛp hL T9 lo#ҡu$a{"&A5;wRf3\Bw`@X;S%z ݜyhZj0\-Û'gUOћOHb!o.7h5 \"U⭿2 ۀ[ E;Ioۈ%A2˰~)ao%t4`]+O{kREA*[L164'YZLG\Ij ><NքmAL߿Q bIpZbKw*/o?(7bV`|xRt `&5__index_level_0__L&z(z5schema % CBSA Code% %Metropolitan Division Code %CSA Code % CBSA Title% %*Metropolitan/Micropolitan Statistical Area% %Metropolitan Division Title% % CSA Title% %County/County Equivalent% % State Name% %FIPS State Code% %FIPS County Code% %Central/Outlying County% %stcofips%%__index_level_0__&1 5 CBSA Code1&6(4982010100&7 5Metropolitan Division Code&52@@(@@&P 5CSA Code,&?9@Z@ (@Z@& 5 CBSA Title&R6( Zapata, TX Aberdeen, SD& 5*Metropolitan/Micropolitan Statistical Area&6(Micropolitan Statistical AreaMetropolitan Statistical Area& 5Metropolitan Division Title &6(Wilmington, DE-MD-NJAnaheim-Santa Ana-Irvine, CA& 5 CSA TitleoA&6 (Youngstown-Warren, OH-PAAlbany-Schenectady, NY& 5County/County Equivalentȧ&6( Zapata County Acadia Parish& 5 State Name &6(WyomingAlabama& 5FIPS State Code&֓6(7201& 5FIPS County Code9#&ı6(840001& 5Central/Outlying County&6(OutlyingCentral& 5stcofipsηt&6(7215301001&5__index_level_0__L&z(zքpandas{"index_columns": ["__index_level_0__"], "column_indexes": [{"name": null, "field_name": null, "pandas_type": "unicode", "numpy_type": "object", "metadata": {"encoding": "UTF-8"}}], "columns": [{"name": "CBSA Code", "field_name": "CBSA Code", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "Metropolitan Division Code", "field_name": "Metropolitan Division Code", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "CSA Code", "field_name": "CSA Code", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "CBSA Title", "field_name": "CBSA Title", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "Metropolitan/Micropolitan Statistical Area", "field_name": "Metropolitan/Micropolitan Statistical Area", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "Metropolitan Division Title", "field_name": "Metropolitan Division Title", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "CSA Title", "field_name": "CSA Title", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "County/County Equivalent", "field_name": "County/County Equivalent", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "State Name", "field_name": "State Name", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "FIPS State Code", "field_name": "FIPS State Code", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "FIPS County Code", "field_name": "FIPS County Code", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "Central/Outlying County", "field_name": "Central/Outlying County", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "stcofips", "field_name": "stcofips", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": null, "field_name": "__index_level_0__", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}], "pandas_version": "0.23.4"}parquet-cpp version 1.4.0PAR1PKrNceJJgeosnap/data/data.py"""Tools for creating and manipulating neighborhood datasets.""" import os import zipfile from warnings import warn import matplotlib.pyplot as plt import pandas as pd import quilt import sys sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from util import adjust_inflation, convert_gdf try: from quilt.data.spatialucr import census except ImportError: warn("Fetching data. This should only happen once") quilt.install("spatialucr/census") quilt.install("spatialucr/census_cartographic") from quilt.data.spatialucr import census try: from quilt.data.geosnap_data import data_store except ImportError: quilt.build("geosnap_data/data_store") from quilt.data.geosnap_data import data_store class Bunch(dict): """A dict with attribute-access.""" def __getattr__(self, key): try: return self.__getitem__(key) except KeyError: raise AttributeError(key) def __setattr__(self, key, value): self.__setitem__(key, value) def __dir__(self): return self.keys() _package_directory = os.path.dirname(os.path.abspath(__file__)) _cbsa = pd.read_parquet(os.path.join(_package_directory, 'cbsas.parquet')) dictionary = pd.read_csv(os.path.join(_package_directory, "variables.csv")) states = census.states() counties = census.counties() tracts = census.tracts_2010 metros = convert_gdf(census.msas()) def _db_checker(database): try: if database == 'ltdb': df = data_store.ltdb() else: df = data_store.ncdb() except AttributeError: df = '' return df #: A dict containing tabular data available to geosnap db = Bunch(census_90=census.variables_1990(), census_00=census.variables_2000(), ltdb=_db_checker('ltdb'), ncdb=_db_checker('ncdb') ) # LTDB importer def read_ltdb(sample, fullcount): """ Read & store data from Brown's Longitudinal Tract Database (LTDB). Parameters ---------- sample : str file path of the zip file containing the standard Sample CSV files downloaded from https://s4.ad.brown.edu/projects/diversity/Researcher/LTBDDload/Default.aspx fullcount: str file path of the zip file containing the standard Fullcount CSV files downloaded from https://s4.ad.brown.edu/projects/diversity/Researcher/LTBDDload/Default.aspx Returns ------- pandas.DataFrame """ sample_zip = zipfile.ZipFile(sample) fullcount_zip = zipfile.ZipFile(fullcount) def _ltdb_reader(path, file, year, dropcols=None): df = pd.read_csv( path.open(file), na_values=["", " ", 99999, -999], converters={ 0: str, "placefp10": str }, low_memory=False, encoding="latin1", ) if dropcols: df.drop(dropcols, axis=1, inplace=True) df.columns = df.columns.str.lower() names = df.columns.values.tolist() names[0] = "geoid" newlist = [] # ignoring the first 4 columns, remove year suffix from column names for name in names[4:]: newlist.append(name[:-2]) colnames = names[:4] + newlist df.columns = colnames # prepend a 0 when FIPS is too short df["geoid"] = df["geoid"].str.rjust(11, "0") df.set_index("geoid", inplace=True) df["year"] = year inflate_cols = ["mhmval", "mrent", "incpc", "hinc", "hincw", "hincb", "hinch", "hinca"] inflate_available = list(set(df.columns).intersection(set( inflate_cols))) if len(inflate_available): # try: df = adjust_inflation(df, inflate_available, year) # except KeyError: # half the dfs don't have these variables # pass return df # read in Brown's LTDB data, both the sample and fullcount files for each # year population, housing units & occupied housing units appear in both # "sample" and "fullcount" files-- currently drop sample and keep fullcount sample70 = _ltdb_reader( sample_zip, "ltdb_std_all_sample/ltdb_std_1970_sample.csv", dropcols=["POP70SP1", "HU70SP", "OHU70SP"], year=1970, ) fullcount70 = _ltdb_reader( fullcount_zip, "LTDB_Std_1970_fullcount.csv", year=1970) sample80 = _ltdb_reader( sample_zip, "ltdb_std_all_sample/ltdb_std_1980_sample.csv", dropcols=["pop80sf3", "pop80sf4", "hu80sp", "ohu80sp"], year=1980, ) fullcount80 = _ltdb_reader( fullcount_zip, "LTDB_Std_1980_fullcount.csv", year=1980) sample90 = _ltdb_reader( sample_zip, "ltdb_std_all_sample/ltdb_std_1990_sample.csv", dropcols=["POP90SF3", "POP90SF4", "HU90SP", "OHU90SP"], year=1990, ) fullcount90 = _ltdb_reader( fullcount_zip, "LTDB_Std_1990_fullcount.csv", year=1990) sample00 = _ltdb_reader( sample_zip, "ltdb_std_all_sample/ltdb_std_2000_sample.csv", dropcols=["POP00SF3", "HU00SP", "OHU00SP"], year=2000, ) fullcount00 = _ltdb_reader( fullcount_zip, "LTDB_Std_2000_fullcount.csv", year=2000) sample10 = _ltdb_reader( sample_zip, "ltdb_std_all_sample/ltdb_std_2010_sample.csv", year=2010) # join the sample and fullcount variables into a single df for the year ltdb_1970 = sample70.drop(columns=['year']).join( fullcount70.iloc[:, 7:], how="left") ltdb_1980 = sample80.drop(columns=['year']).join( fullcount80.iloc[:, 7:], how="left") ltdb_1990 = sample90.drop(columns=['year']).join( fullcount90.iloc[:, 7:], how="left") ltdb_2000 = sample00.drop(columns=['year']).join( fullcount00.iloc[:, 7:], how="left") ltdb_2010 = sample10 df = pd.concat( [ltdb_1970, ltdb_1980, ltdb_1990, ltdb_2000, ltdb_2010], sort=True) renamer = dict( zip(dictionary['ltdb'].tolist(), dictionary['variable'].tolist())) df.rename(renamer, axis="columns", inplace=True) # compute additional variables from lookup table for row in dictionary['formula'].dropna().tolist(): df.eval(row, inplace=True) keeps = df.columns[df.columns.isin(dictionary['variable'].tolist() + ['year'])] df = df[keeps] data_store._set(['ltdb'], df) quilt.build("geosnap_data/data_store", data_store) def read_ncdb(filepath): """ Read & store data from Geolytics's Neighborhood Change Database. Parameters ---------- filepath : str location of the input CSV file extracted from your Geolytics DVD Returns ------- pandas.DataFrame """ ncdb_vars = dictionary["ncdb"].dropna()[1:].values names = [] for name in ncdb_vars: for suffix in ['7', '8', '9', '0', '1', '2']: names.append(name + suffix) names.append('GEO2010') c = pd.read_csv(filepath, nrows=1).columns c = pd.Series(c.values) keep = [] for i, col in c.items(): for name in names: if col.startswith(name): keep.append(col) df = pd.read_csv( filepath, usecols=keep, engine='c', na_values=["", " ", 99999, -999], converters={ "GEO2010": str, "COUNTY": str, "COUSUB": str, "DIVISION": str, "REGION": str, "STATE": str, }, ) cols = df.columns fixed = [] for col in cols: if col.endswith("D"): fixed.append("D" + col[:-1]) elif col.endswith("N"): fixed.append("N" + col[:-1]) elif col.endswith("1A"): fixed.append(col[:-2] + "2") orig = [] for col in cols: if col.endswith("D"): orig.append(col) elif col.endswith("N"): orig.append(col) elif col.endswith("1A"): orig.append(col) renamer = dict(zip(orig, fixed)) df.rename(renamer, axis="columns", inplace=True) df = df[df.columns[df.columns.isin(names)]] df = pd.wide_to_long( df, stubnames=ncdb_vars, i="GEO2010", j="year", suffix="(7|8|9|0|1|2)").reset_index() df["year"] = df["year"].replace({ 7: 1970, 8: 1980, 9: 1990, 0: 2000, 1: 2010, 2: 2010 }) df = df.groupby(["GEO2010", "year"]).first() mapper = dict(zip(dictionary.ncdb, dictionary.variable)) df.reset_index(inplace=True) df = df.rename(mapper, axis="columns") df = df.set_index("geoid") for row in dictionary['formula'].dropna().tolist(): try: df.eval(row, inplace=True) except: warn('Unable to compute ' + str(row)) df = df.round(0) keeps = df.columns[df.columns.isin(dictionary['variable'].tolist() + ['year'])] df = df[keeps] df = df.loc[df.n_total_pop != 0] data_store._set(['ncdb'], df) quilt.build("geosnap_data/data_store", data_store) # TODO NHGIS reader class Community(object): """Spatial and tabular data for a collection of "neighborhoods". A community is a collection of "neighborhoods" represented by spatial boundaries (e.g. census tracts, or blocks in the US), and tabular data which describe the composition of each neighborhood (e.g. data from surveys, sensors, or geocoded misc.). A Community can be large (e.g. a metropolitan region), or small (e.g. a handfull of census tracts) and may have data pertaining to multiple discrete points in time. Parameters ---------- name : str name or title of dataset. source : str database from which to query attribute data. Must of one of ['ltdb', 'ncdb', 'census', 'external']. statefips : list-like list of two-digit State FIPS codes that define a study region. These will be used to select tracts or blocks that fall within the region. countyfips : list-like list of three-digit County FIPS codes that define a study region. These will be used to select tracts or blocks that fall within the region. cbsafips : str CBSA fips code that defines a study region. This is used to select tracts or blocks that fall within the metropolitan region add_indices : list-like list of additional indices that should be included in the region. This is likely a list of additional tracts that are relevant to the study area but do not fall inside the passed boundary boundary : GeoDataFrame A GeoDataFrame that defines the extent of the boundary in question. If a boundary is passed, it will be used to clip the tracts or blocks that fall within it and the state and county lists will be ignored Attributes ---------- census : Pandas DataFrame long-form dataframe containing attribute variables for each unit of analysis. name : str name or title of dataset boundary : GeoDataFrame outer boundary of the study area tracts GeoDataFrame containing tract boundaries counties GeoDataFrame containing County boundaries states GeoDataFrame containing State boundaries """ def __init__(self, source, statefips=None, countyfips=None, cbsafips=None, add_indices=None, boundary=None, name=''): """Instantiate a Community.""" # If a boundary is passed, use it to clip out the appropriate tracts tracts = census.tracts_2010().copy() tracts.columns = tracts.columns.str.lower() self.name = name self.states = states.copy() self.tracts = tracts.copy() self.cbsa = metros.copy()[metros.copy().geoid == cbsafips] self.counties = counties.copy() if boundary is not None: self.tracts = convert_gdf(self.tracts) self.boundary = boundary if boundary.crs != self.tracts.crs: if not boundary.crs: raise('Boundary must have a CRS to ensure valid spatial \ selection') self.tracts = self.tracts.to_crs(boundary.crs) self.tracts = self.tracts[self.tracts.representative_point() .within(self.boundary.unary_union)] self.counties = convert_gdf(self.counties[counties.geoid.isin( self.tracts.geoid.str[0:5])]) self.states = convert_gdf(self.states[states.geoid.isin( self.tracts.geoid.str[0:2])]) self.counties = self.counties.to_crs(boundary.crs) self.states = self.states.to_crs(boundary.crs) # If county and state lists are passed, use them to filter # based on geoid else: assert statefips or countyfips or cbsafips or add_indices statelist = [] if isinstance(statefips, (list, )): statelist.extend(statefips) else: statelist.append(statefips) countylist = [] if isinstance(countyfips, (list, )): countylist.extend(countyfips) else: countylist.append(countyfips) geo_filter = {'state': statelist, 'county': countylist} fips = [] for state in geo_filter['state']: if countyfips is not None: for county in geo_filter['county']: fips.append(state + county) else: fips.append(state) self.states = self.states[states.geoid.isin(statelist)] if countyfips is not None: self.counties = self.counties[self.counties.geoid.str[:5].isin( fips)] self.tracts = self.tracts[self.tracts.geoid.str[:5].isin(fips)] else: self.counties = self.counties[self.counties.geoid.str[:2].isin( fips)] self.tracts = self.tracts[self.tracts.geoid.str[:2].isin(fips)] self.tracts = convert_gdf(self.tracts) self.counties = convert_gdf(self.counties) self.states = convert_gdf(self.states) if source in ['ltdb', 'ncdb']: _df = _db_checker(source) if len(_df) == 0: raise ValueError("Unable to locate {source} data. Please import the database with the `read_{source}` function".format(source=source)) elif source == "external": _df = data else: raise ValueError( "source must be one of 'ltdb', 'ncdb', 'census', 'external'") if cbsafips: if not add_indices: add_indices = [] add_indices += _cbsa[_cbsa['CBSA Code'] == cbsafips][ 'stcofips'].tolist() if add_indices: for index in add_indices: self.tracts = self.tracts.append( convert_gdf(tracts[tracts.geoid.str.startswith(index)])) self.counties = self.counties.append( convert_gdf(counties[counties.geoid.str.startswith( index[0:5])])) self.tracts = self.tracts[~self.tracts.geoid.duplicated(keep='first')] self.counties = self.counties[ ~self.counties.geoid.duplicated(keep='first')] self.census = _df[_df.index.isin(self.tracts.geoid)] def plot(self, column=None, year=2010, ax=None, plot_counties=True, title=None, **kwargs): """Conveniently plot a choropleth of the Community. Parameters ---------- column : str The column to be plotted (the default is None). year : str The decennial census year to be plotted (the default is 2010). ax : type matplotlib.axes on which to plot. plot_counties : bool Whether the plot should include county boundaries (the default is True). title: str Title of figure passed to matplotlib.pyplot.title() **kwargs Returns ------- type Description of returned object. """ assert column, "You must choose a column to plot" colname = '%s' % column if ax is not None: ax = ax else: fig, ax = plt.subplots(figsize=(15, 15)) if colname.startswith('n_'): colname = colname[1:] elif colname.startswith('p_'): colname = colname[1:] colname = colname + ' (%)' colname = colname.replace("_", " ") colname = colname.title() if title: plt.title(title, fontsize=20) else: if self.name: plt.title( self.name + " " + str(year) + '\n' + colname, fontsize=20) else: plt.title(colname + " " + str(year), fontsize=20) plt.axis("off") ax.set_aspect("equal") plotme = self.tracts.merge( self.census[self.census.year == year], left_on="geoid", right_index=True) plotme = plotme.dropna(subset=[column]) plotme.plot(column=column, alpha=0.8, ax=ax, **kwargs) if plot_counties is True: self.counties.plot( edgecolor="#5c5353", linewidth=0.8, facecolor="none", ax=ax) return ax def to_crs(self, crs=None, epsg=None, inplace=False): """Transform all geometries to a new coordinate reference system. Parameters ---------- crs : dict or str Output projection parameters as string or in dictionary form. epsg : int EPSG code specifying output projection. inplace : bool, optional, default: False Whether to return a new GeoDataFrame or do the transformation in place. """ if inplace: self.tracts = self.tracts self.counties = self.counties self.states = self.states else: self.tracts = self.tracts.copy() self.counties = self.counties.copy() self.states = self.states.copy() self.tracts = self.tracts.to_crs(crs=crs, epsg=epsg) self.states = self.states.to_crs(crs=crs, epsg=epsg) self.counties = self.counties.to_crs(crs=crs, epsg=epsg) if not inplace: return self PKJ/N LCCgeosnap/data/variables.csvvariable,label,formula,ltdb,ncdb,census_1990_form,census_1990_table_column,census_2000_form,census_2000_table_column,acs,category,notes geoid,FIPS code,,geoid,GEO2010,,,,,,, n_mexican_pop,persons of Mexican parentage or ancestry,,mex,MEXIC,SF1,P0090001,SF1,PCT011004,B03001_004E,Ethnicity & Immigration, n_cuban_pop,persons of Cuban parentage or ancestry,,cuban,CUBAN,SF1,P0090004,SF1,PCT011006,B03001_006E,Ethnicity & Immigration, n_puerto_rican_pop,persons of Puerto Rican parentage or ancestry,,pr,PRICAN,SF1,P0090003,SF1,PCT011005,B03001_005E,Ethnicity & Immigration, n_russian_pop,persons of Russian/USSR parentage or ancestry,,ruanc,,SF3,P0330022,SF3,PCT016064+PCT016053+PCT016052+PCT016037,B04001_064E,Ethnicity & Immigration,ruancXX (page 17 of LTDB codebook) suggests that USSR is only selected for 1970. I gather you're aggregating soviet countries individually? 1990 doesn't seem to have USSR or several of its constituents n_italian_pop,persons of Italian parentage or ancestry,,itanc,,SF3,P0330016,SF3,PCT016051,B04001_051E,Ethnicity & Immigration, n_german_pop,persons of German parentage or ancestry,,geanc,,SF3,P0330012,SF3,PCT016042,B04001_042E,Ethnicity & Immigration, n_irish_pop,persons of Irish parentage or ancestry,,iranc,,SF3,P0330015,SF3,PCT016049,B04001_049E,Ethnicity & Immigration, n_scandaniavian_pop,persons of Scandinavian parentage/ancestry,,scanc,,,,SF3,PCT016059+PCT016039+PCT016033+PCT016090,B04001_065E,Ethnicity & Immigration,"scanXX (page 18 of LTDB codebook) suggests dedicated nationalities are used in 1990 and 2000. This is despite there being a scandinavian category in 2000 SF3 (PCT016065); [ek]: similarly, the ACS lists both the scandanavian category *and* the individual country nationalities?" n_total_pop_sample,total population from sample-based data,,dfb,,,,,,,Ethnicity & Immigration,LTDB suggests 1980 only n_foreign_born_pop,foreign-born,,fb,FORBORN,SF3,P0360001:10,SF3,P021013,B05002_013E,Ethnicity & Immigration, n_recent_immigrant_pop,recent immigrants (within the past 10 years),,n10imm,,SF3,P0360001:04,SF3,P023002,B05005_007E,Ethnicity & Immigration, n_naturalized_pop,naturalized foreign-born,,nat,FORBCZN,SF3,P0370005,SF3,P021014,B05002_014E,Ethnicity & Immigration, n_age_5_older,persons 5 years and over,,ag5up,,SF3,P0130004:31,SF3,P019001,B16001_001E,Ethnicity & Immigration, n_other_language,persons who speak language other than English at home,,olang,,SF3,P0310002:26,SF3,P019001 - (P019025+P019003+P019047),B16001_001E - B16002_002E,Ethnicity & Immigration,Construct census 2000 count by subtraction from P019001 n_limited_english,persons who speak English not well,,lep,,SF3,P0280004+P0280007+P0280010+P0280014+P0280017+P0280020+P0280024+P0280027+P0280030,SF3,P019022+P019023+P019029+P019013+P019012+P019017+P019018+P019007+P019008+P019061+P019062+P019067+P019066+P019052+P019051+P019057+P019056+P019040+P019045+P019044+P019030+P019039+P019035+P019034,DP02_0113E,Ethnicity & Immigration,"[ljw] cant tell if this includes ""speak other Languages"" as a catchall or if that is a pre-crosstab" n_russian_born_pop,persons who were born in Russia/ USSR,,rufb,,,,SF3,PCT019026,B05006_040E,Ethnicity & Immigration,"[ek] this is STF4 in 1990, so not available from the API" n_italian_born_pop,persons who were born in Italy,,itfb,,,,SF3,PCT019016,B05006_023E,Ethnicity & Immigration,"[ek] this is STF4 in 1990, so not available from the API" n_german_born_pop,persons who were born in Germany,,gefb,,,,SF3,PCT019011,B05006_017E,Ethnicity & Immigration,"[ek] this is STF4 in 1990, so not available from the API" n_irish_born_pop,persons who were born in Ireland,,irfb,,,,SF3,PCT019005,B05006_008E,Ethnicity & Immigration,"[ek] this is STF4 in 1990, so not available from the API" n_scandaniavian__born_pop,persons who were born in Scandinavian Countries,,scfb,,,,SF3,PCT019006+PCT019007,B05006_009M+B05006_010E+B05006_011E+B05006_012M,Ethnicity & Immigration,"[ek] this is STF4 in 1990, so not available from the API" p_mexican_pop,percentage of persons of Mexican parentage or ancestry,p_mexican_pop=n_mexican_pop / n_total_pop*100,pmex,,,,,,,Ethnicity & Immigration, p_cuban_pop,percentage of persons of Cuban parentage or ancestry,p_cuban_pop=n_cuban_pop / n_total_pop*100,pcuban,,,,,,,Ethnicity & Immigration, p_puerto_rican_pop,percentage of persons of Puerto Rican parentage or ancestry,p_puerto_rican_pop=n_puerto_rican_pop / n_total_pop*100,ppr,,,,,,,Ethnicity & Immigration, p_russian_pop,percentage of persons of Russian/USSR parentage or ancestry,p_russian_pop=n_russian_pop / n_total_pop*100,pruanc,,,,,,,Ethnicity & Immigration, p_italian_pop,percentage of persons of Italian parentage or ancestry,p_italian_pop=n_italian_pop / n_total_pop*100,pitanc,,,,,,,Ethnicity & Immigration, p_german_pop,percentage of persons of German parentage or ancestry,p_german_pop=n_german_pop / n_total_pop*100,pgeanc,,,,,,,Ethnicity & Immigration, p_irish_pop,percentage of persons of Irish parentage or ancestry,p_irish_pop=n_irish_pop / n_total_pop*100,piranc,,,,,,,Ethnicity & Immigration, p_scandanavian_pop,percentage of persons of Scandinavian parentage/ancestry,p_scandanavian_pop=n_scandaniavian_pop / n_total_pop*100,pscanc,,,,,,,Ethnicity & Immigration, p_foreign_born_pop,percentage of foreign-born,p_foreign_born_pop=n_foreign_born_pop / n_total_pop*100,pfb,SHRFOR,,,,,,Ethnicity & Immigration, p_recent_immigrant_pop,percentage of recent immigrants (within the past 10 years),p_recent_immigrant_pop=n_recent_immigrant_pop / n_total_pop*100,p10imm,,,,,,,Ethnicity & Immigration, p_naturalized_pop,percentage of naturalized foreign-born,p_naturalized_pop=n_naturalized_pop / n_total_pop*100,pnat,,,,,,,Ethnicity & Immigration, p_other_language,percentage of persons who speak language other than English at home,p_other_language=n_other_language / n_total_pop*100,polang,,,,,,,Ethnicity & Immigration, p_limited_english,percentage of persons who speak English not well,p_limited_english=n_limited_english / n_total_pop*100,plep,,,,,,,Ethnicity & Immigration, p_russian_born_pop,percentage of persons who were born in Russia/ USSR,p_russian_born_pop=n_russian_born_pop / n_total_pop*100,prufb,,,,,,,Ethnicity & Immigration, p_italian_born_pop,percentage of persons who were born in Italy,p_italian_born_pop=n_italian_born_pop / n_total_pop*100,pitfb,,,,,,,Ethnicity & Immigration, p_german_born_pop,percentage of persons who were born in Germany,p_german_born_pop=n_german_born_pop / n_total_pop*100,pgefb,,,,,,,Ethnicity & Immigration, p_irish_born_pop,percentage of persons who were born in Ireland,p_irish_born_pop=n_irish_born_pop / n_total_pop*100,pirfb,,,,,,,Ethnicity & Immigration, p_scandanavian_born_pop,percentage of persons who were born in Scandinavian Countries,p_scandanavian_born_pop=n_scandaniavian__born_pop / n_total_pop*100,pscfb,,,,,,,Ethnicity & Immigration, n_total_housing_units,housing units,,hu,TOTHSUN,SF1,H0010001,SF1,H001001,B25002_001E,"Housing, Age, & Marital Status", n_vacant_housing_units,vacant housing units,,vac,VACHU,SF1,H0020002,SF1,H003003,B25002_003E,"Housing, Age, & Marital Status",divide by B25002_001E for vacancy rate n_occupied_housing_units,occupied housing units,,ohu,OCCHU,SF1,H0020001,SF1,H003002,B25002_002E,"Housing, Age, & Marital Status", n_owner_occupied_housing_units,owner-occupied housing units,,own,OWNOCC,SF1,H0030001,SF1,H004002,B25003_002E,"Housing, Age, & Marital Status", n_renter_occupied_housing_units,renter-occupied housing units,,rent,RNTOCC,SF1,H0030002,SF1,H004003,B25003_003E,"Housing, Age, & Marital Status", n_housing_units_multiunit_structures_denom,housing units denom,n_housing_units_multiunit_structures_denom=n_total_housing_units,dmulti,,,,,,B25024_001E,"Housing, Age, & Marital Status", n_housing_units_multiunit_structures,housing units in multi-unit structures,,multi,,,,SF3,H030004+H030005+H030006+H030007+H030008+H030009,B25024_004E+B25024_005E+B25024_006E+B25024_007E+B25024_008E+B25024_009E,"Housing, Age, & Marital Status",[ljw] LTDB is unclear as to the relevant computed column from SF3-H030*. Recorded columns here are all stationary housing units (not mobile home (H030010) or RV/Van/Boat (H030011) n_total_housing_units_sample,housing units in sample-based data,n_total_housing_units_sample=n_total_housing_units,husp,,,,,,B25024_001E,"Housing, Age, & Marital Status", median_home_value,Median home value,,mhmval,MDVALHS,SF3,H061A001,SF3,H085001,B25077_001E,"Housing, Age, & Marital Status", median_contract_rent,Median monthly contract rent,,mrent,MDGRENT,SF3,H043A001,SF3,H056001,B25058_001E,"Housing, Age, & Marital Status", n_structures_30_old,structures built more than 30 years ago,,h30old,,SF3,H0250005+H0250006+H0250007+H0250008,SF3,H034010+H034009+H034008+H034007,,"Housing, Age, & Marital Status", n_occupied_housing_units_sample,occupied housing units in sample-based data,,ohusp,,SF3,H0040001,SF3,H006001,B25003_001E,"Housing, Age, & Marital Status", n_household_recent_move,household heads moved into unit less than 10 years ago,,h10yrs,,SF3,H0250006+H0250007+H0250008,SF3,H038003+H038004+H038005+H038010+H038011+H038012,,"Housing, Age, & Marital Status", n_persons_under_18,persons age 17 years and under,,a18und,NCHILD,SF3,P0130012+P0130011+P0130010+P0130009+P0130008+P0130007+P0130006+P0130005+P0130004+P0130003+P0130002+P0130001,SF1,P012003+P012004+P012005+P012006+P012027+P012028+P012029+P012030,B01001_003E+B01001_004E+B01001_005E+B01001_006E+B01001_027E+B01001_028E+B01001_029E+B01001_030E,"Housing, Age, & Marital Status", n_persons_over_60,persons age 60 years and over,,a60up,,SF3,P0130025+P0130026+P0130027+P0130028+P0130029+P0130030+P0130031,SF1,P012018:025+ P012042:049,B01001_018E+B01001_019E+B01001_020E+B01001_021E+B01001_022E+B01001_023E+B01001_024E+B01001_025E+B01001_042E+B01001_043E+B01001_044E+B01001_045E+B01001_046E+B01001_047E+B01001_048E+B01001_049E,"Housing, Age, & Marital Status", n_persons_over_75,persons age 75 years and over,,a75up,,SF3,P0130029+P0130030+P0130031,SF1,P012023:25+ P012047:49,B01001_047E+B01001_048E+B01001_049E+B01001_023E+B01001_024E+B01001_025E,"Housing, Age, & Marital Status", n_persons_over_15,population 15 years and over,,ag15up,PERS15P,SF3,P0130010+P0130011+P0130012+P0130013+P0130014+P0130015+P0130016+P0130017+P0130018+P0130019+P0130020+P0130021+P0130022+P0130023+P0130024+P0130025+P0130026+P0130027+P0130028+P0130029+P0130030+P0130031,SF3,P018001,B12001_001E,"Housing, Age, & Marital Status", n_persons_over_25,population 25 years and over,,ag25up,,SF3,P0130018+P0130019+P0130020+P0130021+P0130022+P0130023+P0130024+P0130025+P0130026+P0130027+P0130028+P0130029+P0130030+P0130031,SF3,P037001,B15002_001E,"Housing, Age, & Marital Status",denominator for educational attainment n_married,currently married (excluding separated),,mar,MMARSPP,SF3,P0250001+P0250002,SF3,(P018004+P018013) - (P018007+P018016),B12001_005E,"Housing, Age, & Marital Status", n_widowed_divorced,"widowed, divorced, and separated",,wds,,SF3,P0270005+P0270011+P0270006+P0270012,SF3,P018007+P018009+P018010+P018016+P018018+P018019,B12001_007E+B12001_009E+B12001_010E+B12001_016E+B12001_018E+B12001_019E,"Housing, Age, & Marital Status", n_total_families,total families,,family,FAMSUB,SF3,P0040001,SF1,P031001,B17010_001E,"Housing, Age, & Marital Status",denominator for calculating % female-headed families w/ children n_female_headed_families,female-headed families with children,,fhh,NFFH,SF3,P0190005,SF1,P035016,B17010_017E,"Housing, Age, & Marital Status",numerator for calculating % female-headed families w/ children p_vacant_housing_units,percentage of vacant housing units,p_vacant_housing_units=n_vacant_housing_units / n_total_housing_units*100,pvac,,,,,,,"Housing, Age, & Marital Status", p_owner_occupied_units,percentage of owner-occupied housing units,p_owner_occupied_units=n_occupied_housing_units / n_total_housing_units*100,pown,,,,,,,"Housing, Age, & Marital Status", p_housing_units_multiunit_structures,percentage of housing units in multi-unit structures,p_housing_units_multiunit_structures=n_housing_units_multiunit_structures / n_housing_units_multiunit_structures_denom*100,pmulti,,,,,,,"Housing, Age, & Marital Status", p_structures_30_old,percentage of structures built more than 30 years ago,p_structures_30_old=n_structures_30_old / n_housing_units_multiunit_structures_denom*100,p30old,,,,,,,"Housing, Age, & Marital Status", p_household_recent_move,percentage of household heads moved into unit less than 10 years ago,p_household_recent_move=n_household_recent_move / n_total_households*100,p10yrs,,,,,,,"Housing, Age, & Marital Status", p_persons_under_18,percentage of persons age 17 years and under,p_persons_under_18=n_persons_under_18 / n_total_pop*100,p18und,,,,,,,"Housing, Age, & Marital Status", p_persons_over_60,percentage of persons age 60 years and over,p_persons_over_60=n_persons_over_60 / n_total_pop*100,p60up,,,,,,,"Housing, Age, & Marital Status", p_persons_over_75,percentage of persons age 75 years and over,p_persons_over_75=n_persons_over_75 / n_total_pop*100,p75up,,,,,,,"Housing, Age, & Marital Status", p_married,percent currently married (excluding separated),p_married=n_married / n_persons_over_15*100,pmar,,,,,,,"Housing, Age, & Marital Status", p_widowed_divorced,"percent widowed, divorced, and separated",p_widowed_divorced=n_widowed_divorced / n_persons_over_15*100,pwds,,,,,,,"Housing, Age, & Marital Status",should denom be families or individuals? p_female_headed_families,percentage of female-headed families with children,p_female_headed_families=n_female_headed_families / n_total_families*100,pfhh,,,,,,,"Housing, Age, & Marital Status",should denom be families or individuals? n_white_persons,persons of white race,,white,NSHRWHT,SF1,P0100001+P0100006,SF1,P003003,,Race & Age,"[ljw] inferring here, documentation suggests this is only available for 1970, but white alone (regardless of hispanic) gives this count? [ek] I think the question about hispanic ethnicity was added in 1980, so this is the best they can do to measdure the 'white alone' construct in 1970 " n_nonhisp_white_persons,"persons of white race, not Hispanic origin",,nhwht,NSHRNHW,SF1,P0100001,SF1,P004005,B03002_003E,Race & Age,"[ljw] I read this as P004005 (not hispanic white alone), not P003003 (white alone)" n_black_persons,persons of black race,,black,NSHRBLK,SF1,P0100007+P0100002,SF1,P003004,,Race & Age, n_nonhisp_black_persons,"persons of black race, not Hispanic origin",,nhblk,NSHRNHB,SF1,P0100002,SF1,P004006,B03002_004E,Race & Age, n_hispanic_persons,persons of Hispanic origin,,hisp,NSHRHSP,SF1,P0080001,SF1,P004002,B03002_012E,Race & Age, n_native_persons,"persons of Native American race, not Hispanic origin",,ntv,NSHRAMI,SF1,P0070003,SF1,P004007,B03002_005E,Race & Age,is this nonhispanic? n_hawaiian_persons,"persons of Hawaiian race, not Hispanic origin",,haw,NSHRHIP,SF1,P0070017,SF1,P004009,B02001 _006E,Race & Age,is this nonhispanic? n_asian_indian_persons,persons of Asian Indian race,,india,,SF1,P0070009,SF1,PCT007002,B03002_006E+B03002_007E,Race & Age, n_chinese_persons,persons of Chinese race,,china,,SF1,P0070006,SF1,PCT007005+PCT007015,B02006_005E+B02006_015E,Race & Age, n_filipino_persons,persons of Filipino race,,filip,,SF1,P0070007,SF1,PCT007006,B02006_006E,Race & Age, n_japanese_persons,persons of Japanese race,,japan,,SF1,P0070008,SF1,PCT007009,B02006_009E,Race & Age, n_korean_persons,persons of Korean race,,korea,,SF1,P0070010,SF1,PCT007010,B02006_010E,Race & Age, n_asian_persons,persons of Asian race,,asian,,SF1,P0060004,SF1,P004008,B03002_006E+B03002_007E,Race & Age,for 1990 this is Asian and PI n_vietnamese_persons,persons of Vietnamese race,,viet,,SF1,P0070011,SF1,PCT007017,B02006_017E,Race & Age, n_white_age_distribution,white population with known age distribution,,agewht,,SF1,P0120001:62,SF1,PCT012I001,B01001H_001E,Race & Age, n_white_under_15,0-15 years old of white race,,a15wht,,SF1,P0120001:09+P0120032:40,SF1,PCT012I003:018+PCT012I108:122,B01001H_003E+B01001H_004E+B01001H_005E+B01001H_018E+B01001H_019E+B01001H_020E,Race & Age, n_white_over_60,60 years and older of white race,,a60wht,,SF1,P0120025:31+P0120056:62,SF1,PCT012I063:105+PCT012I167:209,,Race & Age,is this nonhispanic? n_white_over_65,65 years and older of non-Hispanic whites,,a65wht,,SF1,P0120027:31+P0120058:62,SF1,PCT012I068:105+PCT012I172:209,B01001H_014E+B01001H_015E+B01001H_016E+01001H_029E+01001H_030E+01001H_031E,Race & Age, n_black_age_distribution,black population with known age distribution,,ageblk,,SF1,P0120063:0124,SF1,PCT012J001,B01001B_001E,Race & Age, n_black_under_15,0-15 years old of black race,,a15blk,,SF1,P0120063:71+P0120094:102,SF1,PCT012J003:018+PCT012J108:122,B01001B_003E+B01001B_004E+B01001B_005E+B01001B_018E+B01001B_019E_B01001B_020E,Race & Age, n_black_over_60,60 years and older of black race,,a60blk,,SF1,P0120087:93+P0120118:124,SF1,PCT012J063:105+PCT012J167:209,,Race & Age, n_black_over_65,65 years and older of black race,,a65blk,,SF1,P0120089:93+P0120120:124,SF1,PCT012J068:105+PCT012J172:209,B01001B_014E+B01001B_015E+B01001B_016E+B01001B_029E+B01001B_030E+B01001B_031E,Race & Age, n_hispanic_age_distribution,Hispanic population with known age distribution,,agehsp,,SF1,P0130001:62,SF1,PCT012H001,B01001I_001E,Race & Age, n_hispanic_under_15,"0-15 years old, persons of Hispanic origins",,a15hsp,,SF1,P0130001:09+P0130032:40,SF1,PCT012H003:018+PCT012H108:122,B01001I_003E+B01001I_004E+B01001I_005E+B01001I_018E+B01001I_019E+B01001I_020E,Race & Age, n_hispanic_over_60,"60 years and older, persons of Hispanic origins",,a60hsp,,SF1,P0130025:31+P0130056:62,SF1,PCT012H063:105+PCT012H167:209,,Race & Age, n_hispanic_over_65,"65 years and older, persons of Hispanic origins",,a65hsp,,SF1,P0130027:31+P0130058:62,SF1,PCT012H068:105+PCT012H172:209,B01001I_014E+B01001I_015E+B01001I_016E+B01001I_029E+B01001I_030E+B01001I_031E,Race & Age, n_native_age_distribution,Native American population with known age distribution,,agentv,,SF1,P0120125:186,SF1,PCT012K001,B01001C_001E,Race & Age, n_native_under_15,0-15 years old of Native American race,,a15ntv,,SF1,P0120125:133 +P0120156:164,SF1,PCT012K003:018+PCT012K108:122,B01001C_003E+B01001C_004E+B01001C_005E+B01001C_018E+B01001C_019E+B01001C_020E,Race & Age, n_native_over_60,60 years and older of Native American race,,a60ntv,,SF1,P0120149:155+P0120180:186,SF1,PCT012K063:105+PCT012K167:209,,Race & Age, n_native_over_65,65 years and older of Native American race,,a65ntv,,SF1,P0120151:155+P0120182:186,SF1,PCT012K068:105+PCT012K172:209,B01001C_014E+B01001C_015E+B01001C_016E+B01001C_029E+B01001C_030E+B01001C_031E,Race & Age, n_asian_age_distribution,Asian and Pacific Islander population with known age distribution,,ageasn,,SF1,P0120187:248,SF1,PCT012L001+PCT012M001,B01001D_001E+B01001E_001E,Race & Age, n_asian_under_15,0-15 years old of Asians and Pacific Islanders,,a15asn,,SF1,P0120187:195+P0120218:226,SF1,PCT012M003:018+PCT012M108:122+PCT012L003:018+PCT012L108:122,B01001D_003E+B01001D_004E+B01001D_005E+B01001D_018E+B01001D_019E+B01001D_020E+B01001E_003E+B01001E_004E+B01001E_005E+B01001E_018E+B01001E_019E+B01001E_020E,Race & Age, n_asian_over_60,60 years and older of Asians and Pacific Islanders,,a60asn,,,,SF1,PCT012M063:105+PCT012M167:209,,Race & Age, n_asian_over_65,65 years and older of Asians and Pacific Islanders,,a65asn,,,,SF1,PCT012M068:105+PCT012M172:209+PCT012L068:105+PCT012L172:209,B01001D_014E+B01001D_015E+B01001D_016E+B01001E_014E+B01001E_015E+B01001E_016E+B01001E_029E+B01001E_030E+B01001E_031E+B01001D_029E+B01001D_030E+B01001D_031E,Race & Age, p_white_persons,percentage of persons of white race,,pwhite,,,,,,,Race & Age, p_black_persons,percentage of persons of black race,,pnhwht,SHRNHW,,,,,,Race & Age, p_nonhisp_white_persons,"percentage of persons of white race, not Hispanic origin",p_nonhisp_white_persons=n_nonhisp_white_persons / n_total_pop*100,pblack,,,,,,,Race & Age, p_nonhisp_black_persons,"percentage of persons of black race, not Hispanic origin",p_nonhisp_black_persons=n_nonhisp_black_persons / n_total_pop*100,pnhblk,SHRNHB,,,,,,Race & Age, p_hispanic_persons,percentage of persons of Hispanic origin,p_hispanic_persons=n_hispanic_persons / n_total_pop*100,phisp,SHRHSP,,,,,,Race & Age, p_native_persons,percentage of persons of Native American race,p_native_persons=n_native_persons / n_total_pop*100,pntv,SHRNHI,,,,,,Race & Age, p_asian_persons,percentage of persons of Asian race (and Pacific Islander),p_asian_persons=n_asian_persons / n_total_pop*100,pasian,SHRNHR,,,,,,Race & Age, p_hawaiian_persons,percentage of persons of Hawaiian race,p_hawaiian_persons=n_hawaiian_persons / n_total_pop*100,phaw,SHRNHH,,,,,,Race & Age, p_asian_indian_persons,percentage of persons of Asian Indian race,p_asian_indian_persons=n_asian_indian_persons / n_total_pop*100,pindia,,,,,,,Race & Age, p_chinese_persons,percentage of persons of Chinese race,p_chinese_persons=n_chinese_persons / n_total_pop*100,pchina,,,,,,,Race & Age, p_filipino_persons,percentage of persons of Filipino race,p_filipino_persons=n_filipino_persons / n_total_pop*100,pfilip,,,,,,,Race & Age, p_japanese_persons,percentage of persons of Japanese race,p_japanese_persons=n_japanese_persons / n_total_pop*100,pjapan,,,,,,,Race & Age, p_korean_persons,percentage of persons of Korean race,p_korean_persons=n_korean_persons / n_total_pop*100,pkorea,,,,,,,Race & Age, p_vietnamese_persons,percentage of persons of Vietnamese race,p_vietnamese_persons=n_vietnamese_persons / n_total_pop*100,pviet,,,,,,,Race & Age, p_white_under_15,percentage of 0-15 years old of white race,p_white_under_15=n_white_under_15 / n_total_pop*100,p15wht,,,,,,,Race & Age, p_white_over_60,percentage of 60 years and older of white race,p_white_over_60=n_white_over_60 / n_total_pop*100,p60wht,,,,,,,Race & Age, p_white_over_65,percentage of 65 years and older of non-Hispanic whites,p_white_over_65=n_white_over_65 / n_total_pop*100,p65wht,,,,,,,Race & Age, p_black_under_15,percentage of 0-15 years old of black race,p_black_under_15=n_black_under_15 / n_total_pop*100,p15blk,,,,,,,Race & Age, p_black_over_60,percentage of 60 years and older of black race,p_black_over_60=n_black_over_60 / n_total_pop*100,p60blk,,,,,,,Race & Age, p_black_over_65,percentage of 65 years and older of black race,p_black_over_65=n_black_over_65 / n_total_pop*100,p65blk,,,,,,,Race & Age, p_hispanic_under_15,"percentage of 0-15 years old, persons of Hispanic origins",p_hispanic_under_15=n_hispanic_under_15 / n_total_pop*100,p15hsp,,,,,,,Race & Age, p_hispanic_over_60,"percentage of 60 years and older, persons of Hispanic origins",p_hispanic_over_60=n_hispanic_over_60 / n_total_pop*100,p60hsp,,,,,,,Race & Age, p_hispanic_over_65,"percentage of 65 years and older, persons of Hispanic origins",p_hispanic_over_65=n_hispanic_over_65 / n_total_pop*100,p65hsp,,,,,,,Race & Age, p_native_under_15,percentage of 0-15 years old of Native American race,p_native_under_15=n_native_under_15 / n_total_pop*100,p15ntv,,,,,,,Race & Age, p_native_over_60,percentage of 60 years and older of Native American race,p_native_over_60=n_native_over_60 / n_total_pop*100,p60ntv,,,,,,,Race & Age, p_native_over_65,percentage of 65 years and older of Native American race,p_native_over_65=n_native_over_65 / n_total_pop*100,p65ntv,,,,,,,Race & Age, p_asian_under_15,percentage of 0-15 years old of Asians and Pacific Islanders,p_asian_under_15=n_asian_under_15 / n_total_pop*100,p15asn,,,,,,,Race & Age, p_asian_over_60,percentage of 60 years and older of Asians and Pacific Islanders,p_asian_over_60=n_asian_over_60 / n_total_pop*100,p60asn,,,,,,,Race & Age, p_asian_over_65,percentage of 65 years and older of Asians and Pacific Islanders,p_asian_over_65=n_asian_over_65 / n_total_pop*100,p65asn,,,,,,,Race & Age, n_female_over_16,"females 16 years and over, except in armed forces",,dflabf,DCFEPR,SF3,P0700006+P0700007+P0700008,SF3,P043012,,Socioeconomic Status, n_female_labor_force,females in labor force,,flabf,FEPR,SF3,P0700006+P0700007,SF3,P043010,,Socioeconomic Status, n_labor_force,civilian labor force,,clf,,SF3,P0700002+P0700003+P0700006+P0700007,SF3,P043005+P043012,,Socioeconomic Status, n_unemployed_persons,unemployed persons,,unemp,,SF3,P0700003+P0700007,SF3,P043007+P043014,C24010_001E-(B23001_007E+B23001_014E+B23001_021E+B23001_028E+B23001_035E+B23001_042E+B23001_049E+B23001_049E+B23001_056E+B23001_063E+B23001_070E+B23001_093E+B23001_100E+B23001_107E+B23001_114E+B23001_121E+B23001_128E+B23001_135E+B23001_142E+B23001_149E+B23001_156E),Socioeconomic Status, n_employed_over_16,employed persons 16 years and over,,empclf,EMPMT,SF3,P0700002+P0700006,SF3,P049001,B23001_007E+B23001_014E+B23001_021E+B23001_028E+B23001_035E+B23001_042E+B23001_049E+B23001_049E+B23001_056E+B23001_063E+B23001_070E+B23001_093E+B23001_100E+B23001_107E+B23001_114E+B23001_121E+B23001_128E+B23001_135E+B23001_142E+B23001_149E+B23001_156E,Socioeconomic Status, n_employed_professional,professional employees (by occupations),,prof,DLFRAT,SF3,P0780001+P0780002,SF3,P049017+P049044,,Socioeconomic Status, n_employed_manufacturing,manufacturing employees (by industries),,manuf,PRFEMP,SF3,P0770004+P0770005,SF3,P049007+P049034,,Socioeconomic Status, n_employed_self_employed,self-employed,,semp,,SF3,P0790006,SF3,P051012+P051023+P051033+P051044+P051055+P051065,,Socioeconomic Status, n_civilians_over_16,civilian population 16 years and over,,ag16cv,,SF3,P0640002+P0640003+P0640005+P0640006+P0640008+P0640009 +P0640011+P0640012,SF3,P043005+P043012,C24010_001E,Socioeconomic Status, n_civilians_over_18,civilian population 18 years and over,,ag18cv,,,,SF3,P039005+P039010+P039016+P039021,,Socioeconomic Status, n_veterans,veterans,,vet,,SF3,P0640002+P0640005+P0640008+P0640011,SF3,P039006+P039011+P039017+P039022,B21001_002E,Socioeconomic Status, n_civilians_16_64,civilian non-institutionalized persons 16-64 years old,,cni16u,,SF3,P0640002+P0640003+P0640008+P0640009,SF3,P042001,,Socioeconomic Status, n_disabled,disabled,,dis,,SF3,P0680001+P0680002+P0680005+P0680006+P0680009+P0680010+P0680013+P0680014,SF3,P042004+P042007+P042014+P042021+P042024+P042028+P042031+P042038+P042045+P042048,,Socioeconomic Status, median_household_income,Median household income,,hinc,MDHHY,SF3,P080A001,SF3,P053001,B19013_001E,Socioeconomic Status,"in 2015 dollars, will need inflation adjustment for timeseries" n_total_households,total households in sample-based data,,hh,NUMHHS,SF3,P0050001,SF3,P010001,B19001_001E,Socioeconomic Status, median_income_whitehh,Median household income for whites,,hincw,,,,SF3,P152A001,B19013H_001E,Socioeconomic Status,"[ek] the 1990 table noted in the LTDB docs only has ranges, not median (e.g. P0820001)" n_white_households,total white households in sample-based data,,hhw,,SF3,P0080001,SF3,P146A001,B19001H_001E,Socioeconomic Status, median_income_blackhh,Median household income for blacks,,hincb,,,,SF3,P152B001,B19013B_001E,Socioeconomic Status,"[ek] the 1990 table noted in the LTDB docs only has ranges, not median (e.g. P0820001)" n_black_households,total black households in sample-based data,,hhb,,SF3,P0080002,SF3,P146B001,B19001B_001E,Socioeconomic Status, median_income_hispanichh,Median household income for Hispanics,,hinch,,,,SF3,P152H001,B19013I_001E,Socioeconomic Status,"[ek] the 1990 table noted in the LTDB docs only has ranges, not median (e.g. P0820001)" n_hispanic_households,total Hispanic households in sample-based data,,hhh,,SF3,P0210001:07,SF3,P146H001,B19001I_001E,Socioeconomic Status,"[ek] the 1990 value is calculated differently than the LTDB codebook, because the their reference (P0830001) doesnt include hispanic origin" median_income_asianhh,Median household income for Asians and Pacific Islanders,,hinca,,,,SF3,P152D001,,Socioeconomic Status,"[ek] the 1990 and 2010 tables noted in the LTDB docs only have ranges, not median (e.g. P0820001 for 1990 and B19001F_012E for 2010)" n_asian_households,total Asian/Pacific Islander households in sample-based data,,hha,,SF3,P0080004,SF3,P152D001+P152E001,B19001D_001E+B19001E_001E,Socioeconomic Status,"unclear how to calculate, since this is only provided as asian or as PI for 2000. Column recorded is asian+pacific islander" per_capita_income,Per capita income,,incpc,,SF3,P114A001,SF3,P082001,B19301_001E,Socioeconomic Status, n_poverty_determined_persons,persons for whom poverty status is determined,,dpov,DPOVRAT,SF3,P1170001:24,SF3,P087001,B17001_001E,Socioeconomic Status,denominator for calculating poverty rate n_poverty_persons,persons in poverty,,npov,NPOVRAT,SF3,P1170013:24,SF3,P087002,B17001_002E,Socioeconomic Status,numerator for calculating poverty rate n_poverty_over_65,persons 65 years and older in poverty,,n65pov,NELDPOO,SF3,P1170023+P1170024,SF3,P087008+P087009,B17001_015E+B17001_016E+B17001_029E+B17001_030E,Socioeconomic Status, n_poverty_determined_families,families for whom poverty status is determined,,dfmpov,,SF3,P1230001:24,SF3,P090001,B17001_001E,Socioeconomic Status, n_poverty_families_children,families with children in poverty,,nfmpov,,,P1230013:15+P1230017:19+P1230021:23,SF3,P090002,B17010_004E+B17010_011E+B17010_017E,Socioeconomic Status, n_poverty_determined_white,white persons for whom poverty status is determined,,dwpov,DWHTPR,SF3,P1190001:07+P1190036:42,SF3,P159A001,B17001A_001E,Socioeconomic Status,is this nonhispanic? Recorded white (regardless). White (not hispanic) is P159I n_poverty_white,whites in poverty,,nwpov,NWHTPR,SF3,P1190036:42,SF3,P159A002,B17001A_002E,Socioeconomic Status, n_poverty_determined_black,black persons for whom poverty status is determined,,dbpov,DBLKPR,SF3,P1190008:14+P1190043:49,SF3,P159B001,B17001B_001E,Socioeconomic Status, n_poverty_black,blacks in poverty,,nbpov,NBLKPR,SF3,P1190043:49,SF3,P159B002,B17001B_002E,Socioeconomic Status, n_poverty_determined_hispanic,Hispanics for whom poverty status is determined,,dhpov,DHISPR,,,SF3,P159H001,B17020I_001E,Socioeconomic Status,[ek] it's not clear to me how LTDB computed values from this variable https://api.census.gov/data/1990/sf3/variables/P1200001.json n_poverty_hispanic,Hispanics in poverty,,nhpov,NHISPR,,,SF3,P159H002,B17020I_002E,Socioeconomic Status, n_poverty_determined_native,Native American for whom poverty status is determined,,dnapov,DINDPR,SF3,P1190015:21+P1190050:56,SF3,P159C001,B17020C_001E,Socioeconomic Status, n_poverty_native,Native Americans in poverty,,nnapov,INDPR,SF3,P1190050:56,SF3,P159C002,B17020C_002E,Socioeconomic Status, n_poverty_determined_asian,Asians and Pacific Islanders for whom poverty status is determined,,dapov,DASNPR,SF3,P1190022:28+P1190058:63,SF3,P159D001+P159E001,B17020E_001E,Socioeconomic Status,"asian alone is D, hawaiian and pac islander is E" n_poverty_asian,Asians and Pacific Islanders in poverty,,napov,NASNPR,SF3,P1190058:63,SF3,P159D002+P159E002,B17020E_002E,Socioeconomic Status, n_edu_college_greater,persons with at least a four-year college degree,,col,EDUC16,SF3,P0570006+P0570007,SF3,P037015:18+P037032:35,B15002_015E+B15002_016E+B15002_017E+B15002_018E+B15002_032E+B15002_033E+B15002_034E+B15002_035E,Socioeconomic Status, n_edu_hs_less,persons with high school degree or less,,hs,EDUC12,SF3,P0570001+P0570002+P0570003,SF3,P037003:011+P037020:028,B15002_003E+B15002_004E+B15002_005E+B15002_006E+B15002_007E+B15002_008E+B15002_009E+B15002_010E+B15002_020E+B15002_021E+B15002_022E+B15002_023E+B15002_024E+B15002_025E+B15002_026E+B15002_027E,Socioeconomic Status, p_edu_hs_less,percentage of persons with high school degree or less,p_edu_hs_less=n_edu_hs_less / n_persons_over_25*100,phs,,,,,,,Socioeconomic Status, p_edu_college_greater,percentage of persons with at least a four-year college degree,p_edu_college_greater=n_edu_college_greater / n_persons_over_25*100,pcol,,,,,,,Socioeconomic Status, p_unemployment_rate,percent unemployed,p_unemployment_rate=n_unemployed_persons / n_labor_force*100,punemp,UNEMPRT,,,,,,Socioeconomic Status, p_female_labor_force,percentage of females in labor force,,pflabf,,,,,,,Socioeconomic Status, p_employed_professional,percentage of professional employees (by occupations),p_employed_professional=n_employed_professional / n_employed_over_16*100,pprof,,,,,,,Socioeconomic Status, p_employed_manufacturing,percentage of manufacturing employees (by industries),p_employed_manufacturing=n_employed_manufacturing / n_employed_over_16*100,pmanuf,,,,,,,Socioeconomic Status, p_employed_self_employed,percentage of self-employed,p_employed_self_employed=n_employed_self_employed / n_employed_over_16*100,psemp,,,,,,,Socioeconomic Status, p_veterans,percentage of veterans,p_veterans=n_veterans / n_total_pop*100,pvet,,,,,,,Socioeconomic Status, p_disabled,percent with disability,p_disabled=n_disabled / n_total_pop*100,pdis,,,,,,,Socioeconomic Status, p_poverty_rate,percent poor,p_poverty_rate=n_poverty_persons / n_poverty_determined_persons*100,ppov,POVRAT,,,,,,Socioeconomic Status, p_poverty_rate_over_65,percentage of 65 years and older in poverty,p_poverty_rate_over_65=n_poverty_over_65 / n_poverty_determined_persons*100,p65pov,ELDPOO,,,,,,Socioeconomic Status, p_poverty_rate_children,percentage of families with children in poverty,p_poverty_rate_children=n_poverty_families_children / n_poverty_determined_families*100,pfmpov,,,,,,,Socioeconomic Status, p_poverty_rate_white,percentage of whites in poverty,p_poverty_rate_white=n_poverty_white / n_poverty_determined_persons*100,pwpov,WHTPR,,,,,,Socioeconomic Status, p_poverty_rate_black,percentage of blacks in poverty,p_poverty_rate_black=n_poverty_black / n_poverty_determined_persons*100,pbpov,BLKPR,,,,,,Socioeconomic Status, p_poverty_rate_hispanic,percentage of Hispanics in poverty,p_poverty_rate_hispanic=n_poverty_hispanic / n_poverty_determined_persons*100,phpov,,,,,,,Socioeconomic Status, p_poverty_rate_native,percentage of Native Americans in poverty,p_poverty_rate_native=n_poverty_native / n_poverty_determined_persons*100,pnapov,,,,,,,Socioeconomic Status, p_poverty_rate_asian,percentage of Asian and Pacific Islanders in poverty,p_poverty_rate_asian=n_poverty_asian / n_poverty_determined_persons*100,papov,RASPR,,,,,,Socioeconomic Status, n_total_pop,total population,,pop,TRCTPOP,SF1,P0010001,SF1,P001001,B01001_001E,total population,PKUqkNVgeosnap/tests/context.pyimport os import sys sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import analyze import data PKrNgeosnap/tests/test_add_data.pyimport context import os path = os.environ['DLPATH'] read_ltdb = context.data.read_ltdb read_ncdb = context.data.read_ncdb def test_read_ltdb(): read_ltdb( sample=path+"/ltdb_sample.zip", fullcount=path+"/ltdb_full.zip", ) from quilt.data.geosnap_data import data_store assert data_store.ltdb().shape == (330388, 192) def test_read_ncdb(): read_ncdb(path+"/ncdb.csv") from quilt.data.geosnap_data import data_store assert data_store.ncdb().shape == (328633, 77) PKykN"Խ geosnap/tests/test_clusters.pyfrom context import analyze, data import os path = os.environ['DLPATH'] if not os.path.exists(os.path.join(os.path.dirname(os.path.abspath(data.__file__)), "ltdb.parquet")): data.read_ltdb( sample=path+"/ltdb_sample.zip", fullcount=path+"/ltdb_full.zip", ) reno = data.Community(source='ltdb', cbsafips='39900') columns = ['median_household_income', 'p_poverty_rate', 'p_unemployment_rate'] # Aspatial Clusters def test_gm(): gm = analyze.cluster(reno, columns=columns, method='gaussian_mixture', best_model=True) assert len(gm.census.gaussian_mixture.unique()) > 7 def test_ward(): ward = analyze.cluster(reno, columns=columns, method='ward') assert len(ward.census.ward.unique()) == 6 def test_spectral(): spectral = analyze.cluster(reno, columns=columns, method='spectral') assert len(spectral.census.spectral.unique()) == 6 def test_kmeans(): kmeans = analyze.cluster(reno, columns=columns, method='kmeans') assert len(kmeans.census.kmeans.unique()) == 6 def test_aff_prop(): aff_prop = analyze.cluster(reno, columns=columns, method='affinity_propagation', preference=-100) assert len(aff_prop.census.affinity_propagation.unique()) == 3 def test_hdbscan(): hdbscan = analyze.cluster(reno, columns=columns, method='hdbscan') assert len(hdbscan.census.hdbscan.unique()) > 27 # Spatial Clusters def test_spenc(): spenc = analyze.cluster_spatial(reno, columns=columns, method='spenc') assert len(spenc.census.spenc.unique()) == 7 def test_maxp(): maxp = analyze.cluster_spatial(reno, columns=columns, method='max_p', initial=10) assert len(maxp.census.max_p.unique()) > 9 def test_ward_spatial(): ward_spatial = analyze.cluster_spatial(reno, columns=columns, method='ward_spatial') assert len(ward_spatial.census.ward_spatial.unique()) == 7 def test_skater(): skater = analyze.cluster_spatial(reno, columns=columns, method='skater', n_clusters=10) assert len(skater.census.skater.unique()) == 11 def test_azp(): azp = analyze.cluster_spatial(reno, columns=columns, method='azp') assert len(azp.census.azp.unique()) == 7 PKX+N,~I%geosnap/tests/test_data_boundaries.pyfrom context import data from quilt.data.spatialucr import census from quilt.data.spatialucr import census_cartographic def test_metros(): mets = data.metros assert mets.shape == (945, 4) def test_tracts(): assert census.tracts_1990().shape == (61332, 3) assert census.tracts_2000().shape == (65506, 2) assert census.tracts_2010().shape == (73056, 2) assert census_cartographic.tracts_1990().shape == (61693, 2) assert census_cartographic.tracts_2000().shape == (66688, 2) PKR/NS4ongeosnap/tests/test_datasets.pyfrom context import data import os path = os.environ['DLPATH'] if not os.path.exists(os.path.join(os.path.dirname(os.path.abspath(data.__file__)), "ltdb.parquet")): data.read_ltdb( sample=path+"/ltdb_sample.zip", fullcount=path+"/ltdb_full.zip", ) def test_Community_from_boundary(): dc_bound = data.metros[data.metros.name.str.startswith('Washington-Arlington')] dc_bound = dc_bound.to_crs(epsg=2248) dc = data.Community(boundary=dc_bound, source='ltdb') dc = dc.to_crs(epsg=4326) assert dc.tracts.shape == (1359, 2) assert dc.census.shape == (6560, 192) def test_Community_from_cbsa(): la = data.Community(cbsafips='31080', source='ltdb') assert la.tracts.shape == (2929, 2) assert la.census.shape == (14613, 192) def test_Community_from_stcofips(): mn = data.Community(statefips='27', countyfips=['053', '055'], source='ltdb') assert mn.tracts.shape == (304, 2) assert mn.census.shape == (1515, 192) def test_Community_from_indices(): chi = data.Community(source='ltdb', add_indices=['17031', '17019']) assert chi.tracts.shape == (1362, 2) assert chi.census.shape == (6805, 192) PK+Ngeosnap/tests/test_db.pyfrom context import data def test_db_vars90(): df = data.db.census_90 assert df.shape == (61258, 162) def test_db_vars00(): df = data.db.census_00 assert df.shape == (65443, 190) def test_data_dictionary(): df = data.dictionary assert df.shape == (194, 12) PK;nNUgeosnap/tests/test_dynamics.pyfrom context import analyze import numpy as np import pytest Sequence = analyze.dynamics.Sequence def test_Sequence_unequal(): ''' 1. Testing on sequences of unequal lengths. ''' seq1 = 'ACGGTAG' seq2 = 'CCTAAG' seq3 = 'CCTAAGC' # 1.1 substitution cost matrix and indel cost are not given, and will be # generated based on the distance type "interval" seqAna = Sequence([seq1, seq2, seq3], dist_type="interval") subs_mat = np.array([[0., 1., 2., 3.], [1., 0., 1., 2.], [2., 1., 0., 1.], [3., 2., 1., 0.]]) seq_dis_mat = np.array([[ 0., 7., 10.], [ 7., 0., 3.], [10., 3., 0.]]) assert seqAna.k == 4 assert all([a == b for a, b in zip(seqAna.subs_mat.flatten(), subs_mat.flatten())]) assert all([a == b for a, b in zip(seqAna.seq_dis_mat.flatten(), seq_dis_mat.flatten())]) # 1.2 User-defined substitution cost matrix and indel cost subs_mat = np.array([[0, 0.76, 0.29, 0.05], [0.30, 0, 0.40, 0.60], [0.16, 0.61, 0, 0.26], [0.38, 0.20, 0.12, 0]]) indel = subs_mat.max() seqAna = Sequence([seq1, seq2, seq3], subs_mat=subs_mat, indel=indel) seq_dis_mat = np.array([[0. , 1.94, 2.46], [1.94, 0. , 0.76], [2.46, 0.76, 0. ]]) assert all([a == b for a, b in zip(seqAna.seq_dis_mat.flatten(), seq_dis_mat.flatten())]) # 1.3 Calculating "hamming" distance will fail on unequal sequences with pytest.raises(ValueError,): Sequence([seq1, seq2, seq3], dist_type="hamming") def test_Sequence_equal(): ''' 2. Testing on sequences of equal length. ''' seq1 = 'ACGGTAG' seq2 = 'CCTAAGA' seq3 = 'CCTAAGC' # 2.1 Calculating "hamming" distance will not fail on equal sequences seqAna = Sequence([seq1, seq2, seq3], dist_type="hamming") seq_dis_mat = np.array([[0., 6., 6.], [6., 0., 1.], [6., 1., 0.]]) assert all([a == b for a, b in zip(seqAna.seq_dis_mat.flatten(), seq_dis_mat.flatten())]) # 2.2 User-defined substitution cost matrix and indel cost (distance # between different types is always 1 and indel cost is 2 or larger) - # give the same sequence distance matrix as "hamming" distance subs_mat = np.array([[0., 1., 1., 1.], [1., 0., 1., 1.], [1., 1., 0., 1.], [1., 1., 1., 0.]]) indel = 2 seqAna = Sequence([seq1, seq2, seq3], subs_mat=subs_mat, indel=indel) seq_dis_mat = np.array([[0., 6., 6.],[6., 0., 1.],[6., 1., 0.]]) assert all([a == b for a, b in zip(seqAna.seq_dis_mat.flatten(), seq_dis_mat.flatten())]) # 2.3 User-defined substitution cost matrix and indel cost (distance # between different types is always 1 and indel cost is 1) - give a # slightly different sequence distance matrix from "hamming" distance since # insertion and deletion is happening indel = 1 seqAna = Sequence([seq1, seq2, seq3], subs_mat=subs_mat, indel=indel) seq_dis_mat = np.array([[0., 5., 5.],[5., 0., 1.],[5., 1., 0.]]) assert all([a == b for a, b in zip(seqAna.seq_dis_mat.flatten(), seq_dis_mat.flatten())]) PK&)NQuоgeosnap/tests/test_incs.pyfrom context import analyze linc = analyze.incs.linc def test_linc(): labels_0 = [1, 1, 1, 1, 2, 2, 3, 3, 3, 4] labels_1 = [1, 1, 1, 1, 1, 2, 3, 3, 3, 4] res = linc([labels_0, labels_1]) assert res[4] == 1.0 assert res[7] == 0.0 == res[-1] labels_2 = [1, 1, 1, 1, 1, 2, 3, 3, 3, 4] res = linc([labels_1, labels_2]) assert res[0] == 0.0 res = linc([labels_0, labels_1, labels_2]) assert res[0] == 0.25 PKmckN?_`geosnap/util/__init__.pyfrom .util import * PK\'N  geosnap/util/cenpy_fetch.pyimport cenpy import pandas import os import numpy as np filepath = os.path.dirname(__file__) variable_file = os.path.join(filepath, 'variables.csv') variables = pandas.read_csv(variable_file) c2000sf1 = cenpy.base.Connection( 'DecennialSF11990') c2000sf3 = cenpy.base.Connection( 'DecennialSF31990') by_form = variables.groupby('census_1990_form') column_relations = by_form.census_1990_table_column.agg(list) def fetch(unit='tract', state=None, filter=None): """ use Cenpy to collect the necessary variables from the Census API """ sf1cols = process_columns(column_relations.loc['SF1']) sf3cols = process_columns(column_relations.loc['SF3']) evalcols = [ normalize_relation(rel) for rel in variables['census_1990_table_column'].dropna().tolist() ] varnames = variables.dropna( subset=['census_1990_table_column'])['variable'] evals = [parts[0] + "=" + parts[1] for parts in zip(varnames, evalcols)] _sf1 = cenpy.tools.national_to_tract(c2000sf1, sf1cols, wait_by_county=0.5) #_sf1 = c2000sf1.query(sf1cols, geo_unit=unit, geo_filter=filter) _sf1['geoid'] = _sf1.state + _sf1.county + _sf1.tract _sf3 = cenpy.tools.national_to_tract(c2000sf3, sf3cols, wait_by_county=0.5) #_sf3 = c2000sf3.query(sf3cols, geo_unit=unit, geo_filter=filter) _sf3['geoid'] = _sf3.state + _sf3.county + _sf3.tract df = _sf1.merge(_sf3, on='geoid') df.set_index('geoid', inplace=True) df = df.apply(lambda x: pandas.to_numeric(x, errors='coerce'), axis=1) # compute additional variables from lookup table for row in evals: try: df.eval(row, inplace=True, engine='python') except Exception as e: print(row + ' ' + str(e)) df = df.replace('nan', np.nan) for row in variables['formula'].dropna().tolist(): try: df.eval(row, inplace=True, engine='python') except Exception as e: print(str(row) + ' ' + str(e)) keeps = [col for col in df.columns if col in variables.variable.tolist()] df = df[keeps].round(2) return df def process_columns(input_columns): # prepare by taking all sum-of-columns as lists outcols_processing = [s.replace('+', ',') for s in input_columns] outcols = [] while outcols_processing: # stack col = outcols_processing.pop() col = col.replace('-', ',').replace('(', '').replace(')', '') col = [c.strip() for c in col.split(',')] # get each part if len(col) > 1: # if there are many parts col, *rest = col # put the rest back for r in rest: outcols_processing.insert(0, r) else: col = col[0] if ":" in col: # if a part is a range start, stop = col.split(':') # split the range stem = start[:-3] start = int(start[-3:]) stop = int(stop) # and expand the range cols = [ stem + str(col).rjust(3, '0') for col in range(start, stop + 1) ] outcols.extend(cols) else: outcols.append(col) return outcols def normalize_relation(relation): parts = relation.split('+') if len(parts) == 1: if ':' not in relation: return relation else: relation = parts[0] else: relation = '+'.join([normalize_relation(rel.strip()) for rel in parts]) if ":" in relation: start, stop = relation.split(':') stem = start[:-3] start = int(start[-3:]) stop = int(stop) # and expand the range cols = [ stem + str(col).rjust(3, '0') for col in range(start, stop + 1) ] return '+'.join(cols) return relation df = fetch() df.to_csv('census_1990.csv') PKckNZgeosnap/util/util.pyimport pandas as pd from shapely import wkb, wkt import geopandas as gpd def convert_gdf(df): df = df.copy() df.reset_index(inplace=True, drop=True) if 'wkt' in df.columns.tolist(): df['geometry'] = df.wkt.apply(wkt.loads) df = df.drop(columns=['wkt']) else: df['geometry'] = df.wkb.apply(lambda x: wkb.loads(x, hex=True)) df = df.drop(columns=['wkb']) df = gpd.GeoDataFrame(df) df.crs = {"init": "epsg:4326"} return df def adjust_inflation(df, columns, given_year, base_year=2015): """ Adjust currency data for inflation. Parameters ---------- df : DataFrame Dataframe of historical data columns : list-like The columns of the dataframe with currency data given_year: int The year in which the data were collected; e.g. to convert data from the 1990 census to 2015 dollars, this value should be 1990. base_year: int, optional Constant dollar year; e.g. to convert data from the 1990 census to constant 2015 dollars, this value should be 2015. Default is 2015. Returns ------- type DataFrame """ # get inflation adjustment table from BLS inflation = pd.read_excel( "https://www.bls.gov/cpi/research-series/allitems.xlsx", skiprows=6) inflation.columns = inflation.columns.str.lower() inflation.columns = inflation.columns.str.strip(".") inflation = inflation.dropna(subset=["year"]) inflator = inflation.groupby('year')['avg'].first().to_dict() inflator[1970] = 63.9 df = df.copy() updated = df[columns].apply(lambda x: x * (inflator[base_year] / inflator[given_year])) df.update(updated) return df PKrN#geosnap-0.0.2.dist-info/LICENSE.txtCopyright (c) 2007-2015, geosnap Developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Center for Geospatial Sciences nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PK!HPOgeosnap-0.0.2.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,szd&Y)r$[)T&UrPK!Hݳk  geosnap-0.0.2.dist-info/METADATAXks8_ [Jakl(:JMնڭ-I{d# ,_mIqGy'ϸXՐƃ蜗brUZ0eMZPM*ع|\댍/+-8zKѫsU$ɥ8eb+$/$3 e/ϣ?E&CfdjL:77ѨvsmVAci! ]!v'J.!7QWN>/4>4^=F'¦FbVcPwTՉ[L/Ut\pkL 8oaC`6>e2z!2z~wz-o`(*gg\5!?[t䆬*>}Mm/ 9bSy#];eGョn^} Ucyqwl{^n2-Lv{P/(z~Z>$RU~zc۵1E8R~"mjJG%N?HswR'FTڶd32MC,$Ga^S9&=tBR;i찥hiI0a+I¬ O^L&nb:`D y!RmKRF|[7AB\aq\9o^UU)4mU _g np 3n9, mf tFQDm ] P̤xjg3uee:&OBKdE$/RCFYj#z6Rb@8nN@!հLfp@-!XRMǠQ)%Ɩ7X,&%!Vb,O8&CU\SjY chQK׶dfYPQpΒBirؔQ(Ekǧ5 H灁1{B^ݔRr'& )fjo(%6i̎7\ |gnPdK>ʪp;hzjwLB$%<gFޟ^㐉1YbˊSܐd~V+ w"#2M=EIA|8Ew:ɐ8= 6VF zr+)l p FZs {>A;{@HEFwu n?w߈#v` rcj@(/ VaS@B+G᭗ )4ϒ=PÄ< rl*ňZ501JMCrYdQ)k0$&Qokʙ:(LSGX!L&)n.Zưތъ‹@>F$_Ц,Uc L1L6]n"|zƯAKT&**J;W`W.W_ Z-oB`'ZpkBn\q{nIzgְ@V=ݛ;֭ Nό{lӅJngFſu+;8y$tMUEy]/+tNLYrH&{#imXLyu5ECp78eGo 4ĠҒj?Nw8_:_jAG @x)ߏd"CEzt\ph!+~ut:֝z&2҄?j<"6YANB7b%DУW!a/xU^ nnav6s Ut_q [ݟZ^`;gǯǯcwvZ^i'tu ?{] s=hǜ6:;.G?/N_Aw]:U}R} o*dPD 1cxAx{4xɳvuEf]f?>"d*,s]\~~{/ۣ/@PK!H'*geosnap-0.0.2.dist-info/RECORDuIL}~ t(q-PPPpa&'Φ=[^y xkg$EC*`L}ٟ7Ȥ~9ƒ[ 밚+rJD]=+@Ex~.!O|"~Q/N ֳ6+UQGٟXGI4Fc|Bѣ{A%>CmWwFT9>: SrScu ?\rօCA5l :q:]]kY)Ϸ`e՜#KudM%4)W\! s 1'! e9ʳw|UT/ɖV#9F4Z<$c&[b4s/Z':E*b+|'mk\0HG.Y?RDuau b&eOZwy>k()=~?x h}7>Ru@߱Ձ\udlijԆèJe9HkUBrHdJ ?#F^R`īR #0M@Wڈ_{  |Su*N.䲺Xʷm-'}+ :3nZdRBym֓Q,*#YV50+G juoG!q)_%L^-c`NSc4 S(<{VrZPS xheGe)I:joyŕ}CΈjNk, l]ɱ_zWѮjo,{l[gUḣ0JN%+A|ed>xUyT签ST'>%g^;1(wu\̓ll+!u6Raib?xl$>rqZspIQ|}T37 X>5;xa7ݖ;rMD ??Ƌ]ݒ5@##n+QdW O&No[K]SEyoohCNBR֚)-$?R2I3 |r Ѐ*[r>gR0v) 8 RR;CdQzp.n1|5d²ȝ\q12%K°oPKGwN