PKvO焣QQchange_detection/__init__.py"""package for detecting change in time-series data """ __version__ = '0.2.1' PKfOV #change_detection/change_detection.Rprint("Change detection running...") ##### Retrive arguments from Python command arguments <- commandArgs(trailingOnly = TRUE) #rm(list = ls()) #clear the workspace setwd(arguments[1]) ### Set working directory ######################## ####### Load required packages (install needed first time) ####################### library(caTools) library(gets) ### main package for break detection - see Pretis, Reade, and Sucarrat, Journal of Stat. Software, in press. ########################## ####### Parameters ######################### plot_show <- FALSE ###show plots during break detection #### Break detection calibration p_alpha <- 0.000001 ## level of significance for the detection of breaks (main calibration choice) ### Computational parallel <- NULL ### set as integer (=number of cores-1) if selection should run in parallel (may increase speed for longer time series) # recommended to set to "NULL" unless datasets are very large. ######### Input Data variable <- "ratio_quantity" ###choose relevant variable name in the sample data.pick <- read.csv(arguments[2], header=TRUE) #load raw input data ############################################# ############################################ pick.rel <- grep(variable , names(data.pick)) month.c <- as.POSIXct(as.numeric(as.character(data.pick$month)),origin="1970-01-01",tz="GMT") data.pick <- data.frame(month.c, data.pick[,pick.rel]) names.rel <- names(data.pick)[pick.rel] vars <- length(pick.rel) ############################################################################# ################################################ ###################### A: Break Detection Loop ################################################ ############################################################################## result.list <- list() withCallingHandlers({ for (i in 1:vars) { #print(names.rel[i]) print(paste(round((i / vars)*100,1), "%")) y <- as.matrix(data.pick[names.rel[i]]) ############################################### ###Main Break Detection Function (tis=TRUE searches for trend breaks) - called from "gets" package if (sum(is.na(y))==0){ ## if there are no missing values islstr.res <- isat(y, t.pval=p_alpha, sis = FALSE, tis = TRUE, iis = FALSE, plot=plot_show, parallel.options = parallel, max.block.size = 15) } else { ### Create missing value indicator m <- is.na(y) m <- m * 1 ### Fill in missing values with arbitrary number y[is.na(y)] <- -1 islstr.res <- isat(y, t.pval=p_alpha, sis = FALSE, tis = TRUE, iis = FALSE, plot=plot_show, parallel.options = parallel, max.block.size = 12, mxreg = m) } ########################################## result.list[i] <- list(islstr.res) names(result.list)[i] <- names.rel[i] } }, warning = function(w){ results$warn[i] <<- w$message invokeRestart("muffleWarning") }) ###withCalling closed #save(result.list, file=arguments[3]) save.image(file=arguments[3]) print("Break detection done")PKbOWV8v0v0change_detection/functions.pyimport os import subprocess import time from multiprocessing import cpu_count, Process import glob import pandas as pd import numpy as np from ebmdatalab import bq ''' required R packages: # zoo # caTools # gets ''' def install_r_packages(): import rpy2.robjects.packages as rpackages from rpy2.robjects.vectors import StrVector utils = rpackages.importr('utils') utils.chooseCRANmirror(ind=1) packnames = ['zoo','caTools','gets'] names_to_install = [pkg for pkg in packnames if not rpackages.isinstalled(pkg)] if len(names_to_install) > 0: utils.install_packages(StrVector(names_to_install)) print('Installed ' + str(names_to_install)) install_r_packages() def run_r_script(path): command = 'Rscript' path2script = os.path.join(os.getcwd(), path) cmd = [command, path2script] return subprocess.call(cmd) class ChangeDetection(object): ''' Requires the name of a sql query file - file must have suffix ".sql" - but not the name. ''' def __init__(self, name, verbose=False, sample=False, measure=False, direction='both', use_cache=True, csv_name = 'bq_cache.csv'): self.name = name self.num_cores = cpu_count() - 1 self.verbose = verbose self.sample = sample self.measure = measure self.direction = direction self.use_cache = use_cache self.writing = False self.csv_name = csv_name def get_working_dir(self, folder): folder_name = folder.replace('%', '') return os.path.join(os.getcwd(), 'data', folder_name) def create_dir(self, dir_path): os.makedirs(dir_path, exist_ok=True) os.makedirs(os.path.join(dir_path, 'figures'), exist_ok=True) def get_measure_list(self): q = ''' SELECT table_id FROM ebmdatalab.measures.__TABLES__ WHERE table_id LIKE "%s" ''' % (self.name) measure_list = pd.read_gbq(q, project_id = 'ebmdatalab') return measure_list['table_id'] def get_measure_query(self, measure_name): if 'practice' in self.name: code_col = 'practice_id' elif 'ccg' in self.name: code_col = 'pct_id' q = ''' SELECT month, %s AS code, numerator, denominator FROM ebmdatalab.measures.%s ''' % (code_col, measure_name) return q def get_custom_query(self): query = os.path.join(os.getcwd(), self.name + '.sql') with open(query) as q: return q.read() def get_data(self): print('Running all queries') if self.measure: for measure_name in self.measure_list: folder_name = os.path.join(self.name, measure_name) get_data_dir = self.get_working_dir(folder_name) self.create_dir(get_data_dir) query = self.get_measure_query(measure_name) csv_path = os.path.join(get_data_dir, self.csv_name) bq.cached_read(query, csv_path=csv_path, use_cache=self.use_cache) else: get_data_dir = self.get_working_dir(self.name) self.create_dir(get_data_dir) query = self.get_custom_query() csv_path = os.path.join(get_data_dir, self.csv_name) bq.cached_read(query, csv_path=csv_path, use_cache=self.use_cache) print('All queries done') def shape_dataframe(self): ''' Returns data in a dataframe in the format needed for `r_detect()` Args: csv_name: the name of the CSV file to process. The CSV file is assumed to be located in `self.working_dir`. Default `bq_cache.csv` ''' csv_path = os.path.join(self.working_dir, self.csv_name) while not os.path.exists(csv_path): time.sleep(0.5) #time.sleep(3) input_df = pd.read_csv(csv_path) input_df = input_df.sort_values(['code', 'month']) input_df['ratio'] = input_df['numerator']/(input_df['denominator']) ## R script requires this header format: input_df['code'] = 'ratio_quantity.' + input_df['code'] input_df = input_df.set_index(['month', 'code']) ## drop small numbers #mask = (input_df['denominator']>=5) #input_df = input_df.loc[mask] ## remove num/denom columns input_df = input_df.drop(columns=['numerator', 'denominator']) ## unstack input_df = input_df.unstack().reset_index(col_level=1) input_df.columns = input_df.columns.droplevel() input_df['month'] = pd.to_datetime(input_df['month']) input_df = input_df.set_index('month') ## replace inf and -inf with NaN input_df = input_df.replace([np.inf, -np.inf], np.nan) ## drop columns with missing values #input_df = input_df.dropna(axis=1) # removed after implementing fix # to include missing vals ## drop columns with all identical values cols = input_df.select_dtypes([np.number]).columns std = input_df[cols][:-5].std() #added [:-5] to remove ones with #a few wierd values at the end cols_to_drop = std[(std == 0)|(std.isna())].index input_df = input_df.drop(cols_to_drop, axis=1) ## drop columns with high proporiton of missing values mask = input_df.isna().sum() < len(input_df)/2 input_df = input_df[input_df.columns[mask]] ## date to unix timecode (for R) input_df.index = input_df.index - pd.Timestamp("1970-01-01") input_df.index = input_df.index // pd.Timedelta('1s') ## Select random sample if self.sample: input_df = input_df.sample(n=100, random_state=1234, axis=1) return input_df def run_r_script(self, i, script_name, input_name, output_name, *args): ''' - have reduced outputs (a bit faster that way) - for debugging purposes use `verbose` argument" ''' ## Define R command command = 'Rscript' module_folder = os.path.dirname(os.path.realpath(__file__)) path2script = os.path.join(module_folder, script_name) cmd = [command, path2script] ## Define arguments to pass to R arguments = [self.working_dir, input_name, output_name] for arg in args: arguments.append(arg) ## run the command if i == 0: if self.verbose: return subprocess.Popen(cmd + arguments) return subprocess.Popen(cmd + arguments, stderr=subprocess.DEVNULL) return subprocess.Popen(cmd + arguments, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) def r_detect(self): ''' Splits the DataFrame in pieces and runs the change detection algorithm on a separate process for each piece ''' ## Get data and split split_df = np.array_split(self.shape_dataframe(), self.num_cores, axis=1) ## Initiate a seperate R process for each sub-DataFrame i = 0 processes = [] for item in split_df: script_name = 'change_detection.R' input_name = "r_input_%s.csv" % (i) output_name = "r_intermediate_%s.RData" % (i) df = pd.DataFrame(item) df.to_csv(os.path.join(self.working_dir, input_name)) process = self.run_r_script(i, script_name, input_name, output_name) processes.append(process) i += 1 for process in processes: process.wait() assert process.returncode == 0, \ 'Change detection process failed %s' % (process.args) def r_extract(self): ''' This R script could technically be combined with the r_detect one, but it was easier/more flexible to keep them separate when writing ''' processes = [] for i in range(0, self.num_cores): script_name = 'results_extract.R' input_name = "r_intermediate_%s.RData" % (i) output_name = "r_output_%s.csv" % (i) module_folder = os.path.dirname(os.path.realpath(__file__)) process = self.run_r_script(i, script_name, input_name, output_name, module_folder, self.direction) processes.append(process) for process in processes: process.wait() assert process.returncode == 0, \ 'Results extraction process failed %s' % (process.args) def concatenate_split_dfs(self): files = glob.glob(os.path.join(self.working_dir, 'r_output_*.csv')) df_to_concat = (pd.read_csv(f) for f in files) df = pd.concat(df_to_concat) df = df.drop('Unnamed: 0', axis=1) df['name'] = df['name'].str.lstrip('ratio_quantity.') df = df.set_index('name') df.to_csv(os.path.join(self.working_dir, 'r_output.csv')) def detect_change(self): if self.measure: for measure_name in self.measure_list: folder_name = os.path.join(self.name, measure_name) self.working_dir = self.get_working_dir(folder_name) out_path = os.path.join(self.working_dir, 'r_output.csv') if ~os.path.exists(out_path): self.r_detect() self.r_extract() self.concatenate_split_dfs() else: self.working_dir = self.get_working_dir(self.name) if ~os.path.isdir(os.path.join(self.working_dir, 'figures')): get_data_dir = self.get_working_dir(self.name) self.create_dir(get_data_dir) out_path = os.path.join(self.working_dir, 'r_output.csv') if ~os.path.exists(out_path): self.r_detect() self.r_extract() self.concatenate_split_dfs() def clear(self): os.system( 'cls' ) def run(self): if self.measure: self.measure_list = self.get_measure_list() if self.csv_name == 'bq_cache.csv': p1 = Process(target = self.get_data) p1.start() p2 = Process(target = self.detect_change) p2.start() def concatenate_outputs(self): assert self.measure, "Not to be used on single outputs" working_dir = self.get_working_dir(self.name) folders = os.listdir(working_dir) files = [] for folder in folders: file = os.path.join(working_dir, folder, 'r_output.csv') files.append(file) df_to_concat = (pd.read_csv(f) for f in files) df_to_concat = list(df_to_concat) for i in range(0, len(folders)): df_to_concat[i]['measure'] = folders[i] df = pd.concat(df_to_concat) df = df.sort_values(['measure','name']) return df.set_index(['measure','name']) PKfO{ˏ88"change_detection/results_extract.Rprint("Extracting and classifying changes") library(caTools) library(gets) ### main package for break detection - see Pretis, Reade, and Sucarrat, Journal of Stat. Software, in press. ##### Retrive arguments from Python command arguments <- commandArgs(trailingOnly = TRUE) ################################################################### ####################################### ########### B: Extract Trend-Break Results ####################################### #################################################################### #rm(list = ls()) #clear the workspace #setwd("C:\\Users\\ajwalker\\Documents\\GitHub\\prescribing_change_metrics\\data\\testing") # for testing only setwd(arguments[1]) #load("r_intermediate_2.RData") load(arguments[2]) vars.list <- length(result.list) arguments <- commandArgs(trailingOnly = TRUE) #### additional source code for trend functions for analysis source(file.path(arguments[4], "trend_isat_functions.R")) #source("C:\\Users\\ajwalker\\Documents\\GitHub\\prescribing_change_metrics\\trend_isat_functions.R") #################### ########## Calibration of Analysis ################### saveplots_analysis <- TRUE ###save plots of output of analysis fig_path_tis_analysis <- paste(arguments[1], "/figures/", sep = "") ###set path to store analysis figures #fig_path_tis_analysis <- "C:\\Users\\ajwalker\\Documents\\GitHub\\prescribing_change_metrics\\data\\testing\\figures\\" ###set path to store analysis figures ###### Timing Measures known.t <- 0 ### Time of known intervention in the sample, e.g. medication became available as generic at observation t=18 break.t.lim <- .8 ### Proportion offset after negative break ###### Slope Measures slope.lim <- 0.5 ### Proportion of slope drop for construction of slope measure ######################### ######################### ############### to store results results <- data.frame(name=names.rel) ### Number of Detected Breaks results$is.nbreak <- NA ### Number of breaks ### Timing Measures results$is.tfirst <- NA ### First negative break results$is.tfirst.pknown <- NA ### First negative break after a known intervention date results$is.tfirst.pknown.offs <- NA ### First negative break after a known intervention date not offset by a XX% increase results$is.tfirst.offs <- NA ###First negative break not offset by a XX% increase results$is.tfirst.big <- NA ###steepest break as identified by is.slope.ma ### Slope Measures results$is.slope.ma <- NA ### Average slope over steepest segment contributing at least XX% of total drop results$is.slope.ma.prop <- NA ### Average slope as proportion to prior level results$is.slope.ma.prop.lev <- NA ### Percentage of the total drop the segment used to evaluate the slope makes up ### Level Measures results$is.intlev.initlev <- NA ### Pre-drop level results$is.intlev.finallev <- NA ### End level results$is.intlev.levd <- NA ### Difference between pre and end level results$is.intlev.levdprop <- NA ### Proportion of drop ######################################## ################### Loop over different series for (i in 1:(vars.list)) { #print(names.rel[i]) y <- data.pick[names.rel[i]] results$name[i] <- names.rel[i] islstr.res <- result.list[[i]] ### Number of trend breaks nbreak <- NROW(grep("tis", islstr.res$ISnames)) results$is.nbreak[i] <- nbreak #number of breaks ###coefficient path tis.path <- trend.var(islstr.res) ############################################# ##### Measure 1: Timing of Breaks ################################################ if (nbreak > 0){ ##if there are any relevant breaks #trend break names: tnames <- islstr.res$ISnames[grep("tis", islstr.res$ISnames)] if (NCOL(islstr.res$aux$mX[,tnames]) > 1){ tdates <- apply(islstr.res$aux$mX[,tnames],2,function(x) (which(x>0))[1]) ##finds first non-zero index } else { tdates <- min(which(islstr.res$aux$mX[,tnames] > 0)) } ###coefficients and fitted values rel.coef.num <- islstr.res$specific.spec[tnames] rel.coef <- islstr.res$coefficients[rel.coef.num] mconst.res <- islstr.res$coefficients[islstr.res$specific.spec["mconst"]] fit.res <- fitted(islstr.res) ##fitted values fit.res <- fit.res[!fit.res<0] #### Measure 1.1: the first breaks where the coefficient path is also downward sloping if (arguments[5] == 'both'){ direction <- 'min(which(tis.path$indic.fit$coef != 0))' } else if (arguments[5] == 'up'){ direction <- 'min(which(tis.path$indic.fit$coef > 0))' } else if (arguments[5] == 'down'){ direction <- 'min(which(tis.path$indic.fit$coef < 0))' } is.first <- eval(parse(text=direction)) results$is.tfirst[i] <- is.first ### Measure 1.2: first negative break after the known break-date intervention if (arguments[5] == 'both'){ direction <- 'min( tdates[which(tis.path$indic.fit$coef[tdates] != 0)][tdates[which(tis.path$indic.fit$coef[tdates] != 0)] > known.t] )' } else if (arguments[5] == 'up'){ direction <- 'min( tdates[which(tis.path$indic.fit$coef[tdates] > 0)][tdates[which(tis.path$indic.fit$coef[tdates] > 0)] > known.t] )' } else if (arguments[5] == 'down'){ direction <- 'min( tdates[which(tis.path$indic.fit$coef[tdates] < 0)][tdates[which(tis.path$indic.fit$coef[tdates] < 0)] > known.t] )' } is.first.pknown <- eval(parse(text=direction)) results$is.tfirst.pknown[i] <- is.first.pknown #### Measure 1.3: the first negative break where there is no subsequent offset of at least break.t.lim offset <- array(NA, dim=NROW(tdates)) levels <- array(NA, dim=NROW(tdates)) for (j in 1:NROW(tdates)){ ###for each break, compute the total change date <- tdates[j] if (j < NROW(tdates)){ enddate <- tdates[j+1] } else { enddate <- NROW(tis.path$indic.fit$indic.fit) } startlev <- tis.path$indic.fit$indic.fit[date-1] endlev <- tis.path$indic.fit$indic.fit[enddate-1] levchange <- endlev - startlev levels[j] <- levchange } ratios <- array(NA, dim= (NROW(levels)-1)) if ( NROW(levels) > 1){ for (j in 1: (NROW(levels)-1)){ ratios[j] <- levels[j+1]/levels[j] if (ratios[j] < -break.t.lim & !is.na(ratios[j])){ offset[j] <- TRUE } else { offset[j] <- FALSE } offset[NROW(levels)] <- FALSE } } else { offset <- FALSE } ############ FUTURE - ADD IN OFFSETS *BEFORE* BREAK TOO ############### ### Store first negative break which is not offset and which occurs after known break date if (arguments[5] == 'both'){ direction <- 'min(tdates[rel.coef != 0 & tdates >= known.t & tis.path$indic.fit$coef[tdates] != 0 & offset == FALSE])' } else if (arguments[5] == 'up'){ direction <- 'min(tdates[rel.coef > 0 & tdates >= known.t & tis.path$indic.fit$coef[tdates] > 0 & offset == FALSE])' } else if (arguments[5] == 'down'){ direction <- 'min(tdates[rel.coef < 0 & tdates >= known.t & tis.path$indic.fit$coef[tdates] < 0 & offset == FALSE])' } is.first.pknown.offs <- eval(parse(text=direction)) results$is.tfirst.pknown.offs[i] <- is.first.pknown.offs ### Store first negative break which is not offset (regardless of known break date) if (arguments[5] == 'both'){ direction <- 'min(tdates[rel.coef != 0 & tis.path$indic.fit$coef[tdates] != 0 & offset == FALSE])' } else if (arguments[5] == 'up'){ direction <- 'min(tdates[rel.coef > 0 & tis.path$indic.fit$coef[tdates] > 0 & offset == FALSE])' } else if (arguments[5] == 'down'){ direction <- 'min(tdates[rel.coef < 0 & tis.path$indic.fit$coef[tdates] < 0 & offset == FALSE])' } is.first.offs <- eval(parse(text=direction)) results$is.tfirst.offs[i] <- is.first.offs ############################################# ##### Measure 2 Steepness/Slope: average slope of the steepest contiguous segment contributing to at least XX% of the total level change ################################################ # print(!is.first==Inf) if (!is.first==Inf) #first break not to lie before the known break date { coefp.dif <- tis.path$indic.fit$coef const.path <- tis.path$indic.fit$indic.fit first.index <- which(tdates==is.first.pknown ) interval <- const.path[tdates[first.index:length(tdates)]-1] #predrop <- fit.res[is.first.pknown-1] #changed: FP Sept 13th. predrop <- fit.res[is.first.pknown] #totaldif <- sum(coefp.dif[(is.first.pknown-1):(NROW(coefp.dif))]) # total drop, change in every period, i.e. the slope #changed: FP Sept 13th. totaldif <- sum(coefp.dif[(is.first.pknown):(NROW(coefp.dif))]) # total drop, change in every period, i.e. the slope max_interval <- NROW(const.path) - is.first.pknown + 1 grid_sum <- matrix(NA, ncol=max_interval, nrow=max_interval) grid_mean <- matrix(NA, ncol=max_interval, nrow=max_interval) #####Grid Search: for (j in 1:max_interval){ grid_sum[,j] <- runmean(coefp.dif[(is.first.pknown):NROW(coefp.dif)], j, align="left", endrule="NA")*j #sum over every length (columns) at every point (rows) grid_mean[,j] <- runmean(coefp.dif[(is.first.pknown):NROW(coefp.dif)], j, align="left", endrule="NA") #take the running mean of the slope, corresponding to the values above } grid_prop <- grid_sum*(as.numeric(totaldif))^(-1) maxc <- apply(grid_prop,2,max, na.rm=TRUE) min_index <- min(which(maxc>slope.lim)) #Find the steepest slope that falls within this shortest interval and satisfies the XX% requirement: minslopgrid <- which(grid_prop[,min_index] > slope.lim) slopeval <- grid_mean[minslopgrid[which.max(abs(grid_mean[minslopgrid, min_index]))], min_index] ###find the maximum slope, on the shortest interval, that yields over XX% drop interval.full <- const.path[c(tdates[first.index:length(tdates)]-1, NROW(const.path))] if(length(tdates[first.index:length(tdates)])>1){ #if more than one break slopindex <- minslopgrid[which.max(abs(grid_mean[minslopgrid, min_index]))] } else { #if just one break slopindex <- 1 #start at the beginning } coef.p <- const.path coefp.dif.hl <- coefp.dif*NA coefp.dif.hl[(is.first.pknown+slopindex-2):((is.first.pknown+slopindex)+min_index-3) ] <- coefp.dif[(is.first.pknown+slopindex-2):((is.first.pknown+slopindex)+min_index-3) ] ### Store the part of the slope segment evaluated for plotting coef.p.hl <- coef.p*NA coef.p.hl[(is.first.pknown+slopindex-2):((is.first.pknown+slopindex)+min_index-2)] <- const.path[(is.first.pknown+slopindex-2):((is.first.pknown+slopindex)+min_index-2)] result.list[[i]]$is.results$coef.p.hl <- coef.p.hl big.break.index <- which(round(tis.path$coef.var$coef, digits = 4)==round(slopeval, digits = 4)) ###Store Slope Results results$is.slope.ma[i] <- slopeval #slope over the contiguous segment results$is.slope.ma.prop[i] <- slopeval/predrop #slope over the contiguous segment as proportion of prior level results$is.slope.ma.prop.lev[i] <- grid_prop[slopindex,min_index ] #percentage of total drop that the contiguous segment contributes ###Biggest break big.break <- is.first.pknown+slopindex-1 ### which(round(tis.path$coef.var$coef, digits = 4)==round(slopeval, digits = 4)) results$is.tfirst.big[i] <- big.break } ############################################# ##### Measure 3: Magnitude of Change ################################################ start.lev <- is.first.pknown-1 init.lev <- fit.res[start.lev] end.lev <- fit.res[NROW(fit.res)] ### Store Magnitude Results results$is.intlev.initlev[i] <- init.lev results$is.intlev.finallev[i] <- end.lev results$is.intlev.levd[i] <- as.numeric(init.lev) - as.numeric(end.lev) #absulte change results$is.intlev.levdprop[i] <- (as.numeric(init.lev) - as.numeric(end.lev))/as.numeric(init.lev) #percentage change print(paste(round((i / vars)*100,1), "%")) } ## if there are breaks closed #### Save analysis plots if (saveplots_analysis){ filename <- paste(fig_path_tis_analysis, results$name[i], ".png", sep="") wid <- 500 hei <- 500 png(filename) par(mfrow=c(1,1)) islstr.res$aux$y[islstr.res$aux$y == 99] <- NA plot(islstr.res$aux$y, col="black", ylab="Numerator over denominator", xlab="Time series months", type="l",ylim=c(0, 1),las=1) ## trendline <- tis.path$indic.fit$indic.fit+islstr.res$coefficients[islstr.res$specific.spec["mconst"]] lines(trendline, col="red", lwd=2) ###fitted lines if (nbreak > 0){ if (!is.first==Inf){ abline(h=fit.res[is.first.pknown-1], lty=3, col="purple", lwd=2)### start value abline(h=fit.res[NROW(fit.res)], lty=3, col="purple", lwd=2)### end value lines(coef.p.hl+mconst.res, col=rgb(red = 1, green = 0.4118, blue = 0, alpha = 0.5), lwd=15) ###section used to evaluate slope #print(big.break.index) if (length(big.break.index) != 0){ abline(v=tdates[min(big.break.index)], lty=2, col="blue", lwd=2) ## first negative break after intervention which is not off-set } } } #abline(v=known.t, lty=1, col="blue", lwd=2)### known intervention, blue dottedwarnings() #print(names.rel[i]) dev.copy(png,filename=filename, width=wid, height=hei) dev.off() dev.off() } } #loop over i closed write.csv(results, file = arguments[3]) print("Done extracting results") PKfOӏϲ 'change_detection/trend_isat_functions.R ##################### ####extract trend coefficients and compute coefficient path ######################## x <- islstr.res trend.var <- function(x, mxfull=NULL, mxbreak="tis"){ if( length(grep(mxbreak, x$ISnames) ) != 0 ){ if (is.null(mxfull)){ relvar <- grep(mxbreak, names(coefficients(x))) rel.coefs <- coefficients(x)[relvar] end.coef <- sum(rel.coefs) rel.variance <- x$vcov.mean[relvar, relvar] coef.var <- data.frame(matrix(NA, length(rel.coefs), 3)) names(coef.var) <- c("tis", "coef", "se") coef.var$tis <- names(coefficients(x)[relvar]) coef.var$coef <- cumsum(rel.coefs) ##find the timing: rel.mx <- grep(mxbreak, x$ISnames) if (NCOL(x$aux$mX[,x$ISnames]) > 1 ){ indic.mat <- x$aux$mX[,x$ISnames][,rel.mx] } else { indic.mat <- x$aux$mX[,x$ISnames] } if (!is.null(dim(indic.mat))){ timing <- apply(indic.mat[,], 2, function(x) min(which(x!=0))) } else { timing <- min(which(indic.mat!=0)) } for (i in 1:length(relvar)){ #i <- 3 if (length(relvar) > 1){ cuvar <- sum(rel.variance[1:i, 1:i]) } else { cuvar <- rel.variance[1] } coef.var$se[i] <- sqrt(cuvar) } coef.var$t.val <- coef.var$coef/coef.var$se coef.var$p.val <- (1-pnorm(abs(coef.var$t.val)))*2 coef.var$time <- timing ###get a coefficient path out if (length(rel.coefs) > 1){ indic.fit <- data.frame(indic.mat %*% rel.coefs) names(indic.fit) <- c("indic.fit") } else { indic.fit <- data.frame(indic.mat * rel.coefs) names(indic.fit) <- c("indic.fit") } indic.fit$coef <- 0 indic.fit$se <- 0 indic.fit$t.val <- 0 indic.fit$p.val <- 0 for (j in 1:length(timing)){ start <- timing[j] if (j < length(timing)){ end <- timing[j+1]-1 } else { end <- NROW(indic.fit) } indic.fit$coef[start:end] <- coef.var$coef[j] indic.fit$se[start:end] <- coef.var$se[j] indic.fit$t.val[start:end] <- coef.var$t.val[j] indic.fit$p.val[start:end] <- coef.var$p.val[j] } } #if null check closed } else { #grep check closed else coef.var <- NULL indic.fit <- data.frame(matrix(0, nrow=NROW(x$aux$y), ncol=1)) names(indic.fit) <- c("indic.fit") indic.fit$coef <- 0 indic.fit$se <- 0 indic.fit$t.val <- 0 indic.fit$p.val <- 0 } #grep check closed out <- list(coef.var, indic.fit) names(out) <- c("coef.var", "indic.fit") return(out) } ###function closed PK!HMuSa&change_detection-0.2.1.dist-info/WHEEL HM K-*ϳR03rOK-J,/RH,szd&Y)r$[)T&UD"PK!H0 )change_detection-0.2.1.dist-info/METADATAVmo6_q@0j9:`0bYu$ufJ:[%%);ޯs伵6,@xo|;*WA[3g\$ j]֤ehAOEi+颋4AJBi#!GtfmRNiA,(u0PT-K2`4Z.ӅX^|Uػ,lidUd'no٣H*QOVeOszq2!J§O50/c.k}Q1-Q?*Z?Up!d |b@ATB k7"m>'mJ4LIm$^Z@n[^86e:xYNsrÜSLɾi#m@²&|fXIɞt9$%:t^Xub/z>X!`@Ra]V"@Ȗ:q}KI> asK!wq=R(|#b-ȥ?*7g]S`o}-aooBS^/iA8T([:[^׻84d BIBX"4FAٮH \Ŕb$Q孻9GOk tz7 *+t{t~D=8oSZgSp1EZ?euynPM3!ˆ}rX-ax{B9M)PۮUd&o#u:&|CTf<nG:l>f|d`$vScU51;94&_H.<Y_t&TI`/{ 3_]Jx\Kuem#.䡄PK!He'change_detection-0.2.1.dist-info/RECORDr@} ,QȦCAOfLRqfv]% (vb1$-" oӞFj=>=hFn9r[F埊,ˊ]DNMVmxj;6k7\6y%A}y/Ey DZ޵_9rt\ãF5{DN~d{H %h8(%v=:z8d| +YH[mgKڝ bsÚΌir3aJD(p