-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_collisionDensity(SWITRS).R
106 lines (98 loc) · 4.56 KB
/
generate_collisionDensity(SWITRS).R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
setwd("//ahmct-065/teams/PMRF/Amir")
library(data.table)
library(dplyr)
library(tidyr)
CHP.df=fread(file = "./bin/Final Datasets/CHP.csv", sep=",", header=TRUE)
CHP.df[CHP.df==""]=NA
#CHP.df$SIDE_OF_HWY[which(is.na(CHP.df$SIDE_OF_HWY))]=CHP.df$DIRECTION[which(is.na(CHP.df$SIDE_OF_HWY))]
CHP.df$SIDE_OF_HWY[which(is.na(CHP.df$SIDE_OF_HWY))]="R"
CHP.df$Odometer=as.numeric(CHP.df$Odometer)
#LEMO.df=fread(file = "./bin/Final Datasets/LEMO_WorkOrder+odom.csv", sep=",", header=TRUE)
RouteMarker.df=fread(file="./RouteMarker.odometer.csv", sep = ",", header = TRUE)
#RouteMarker_bins.df=setDT(RouteMarker.df)[order(route, start), .(segments=seq.maker(start, end)), by=.(route, county_code, start, end)]
#fwrite(RouteMarker_bins.df, file = "RouteMarker_bins.csv", sep=",", append=FALSE)
seq.maker=function(start, end){
if ((end-start)%%2==0){
return(seq(start, end, 2))
} else{
return(c(seq(start, end, 2), end))
}
}
bin.df=as.data.frame(matrix(NA, nrow = 0, ncol = 5))
colnames(bin.df)=c("CASE_ID", "COUNTY_ROUTE_ID", "BIN_ID", "bin_start", "bin_end")
#bins=apply(RouteMarker.df, 1, function(x) seq(x[3], x[4], 2))
for (i in 1:length(CHP.df$CASE_ID)){
county=CHP.df$CALTRANS_COUNTY[i]
if (is.na(county)){
next
}
route_id=CHP.df$STATE_ROUTE[i]
if (is.na(route_id)){
next
}
odom=NA
if (!is.na(CHP.df$Odometer[i])){
odom=CHP.df$Odometer[i]
} else{
next
}
filter_routeMarker.df=RouteMarker.df[which(RouteMarker.df$county_code==county & RouteMarker.df$route==route_id),]
ind=findInterval(odom, filter_routeMarker.df$start, rightmost.closed = TRUE)
if (ind==0){
ind=1
temp_bins=seq(filter_routeMarker.df$start[ind], filter_routeMarker.df$end[ind], 2)
route_county_id=which(RouteMarker.df$route==route_id &
RouteMarker.df$county_code==county &
RouteMarker.df$start==filter_routeMarker.df$start[ind])
bin_id=findInterval(odom, temp_bins, rightmost.closed = TRUE)
if (bin_id==0){
bin_id=1
}
bin_start=temp_bins[bin_id]
bin_end=temp_bins[bin_id+1]
#not sure what to do if a chp odometer is found before the minimum odometer listed for the route!!
#for now, count it as if it belongs to the first bin
bin.df=rbind(bin.df, data.frame(CASE_ID=CHP.df$CASE_ID[i],
ROUTE_COUNTY_ID=route_county_id,
BIN_ID=bin_id,
bin_start=bin_start,
bin_end=bin_end))
} else{
if (filter_routeMarker.df$end[ind]-filter_routeMarker.df$start[ind]>2){
temp_bins=seq(filter_routeMarker.df$start[ind], filter_routeMarker.df$end[ind], 2)
route_county_id=which(RouteMarker.df$route==route_id &
RouteMarker.df$county_code==county &
RouteMarker.df$start==filter_routeMarker.df$start[ind])
bin_id=findInterval(odom, temp_bins, rightmost.closed = TRUE)
if (bin_id==0){
bin_id=1
}
bin_start=temp_bins[bin_id]
bin_end=temp_bins[bin_id+1]
bin.df=rbind(bin.df, data.frame(CASE_ID=CHP.df$CASE_ID[i],
ROUTE_COUNTY_ID=route_county_id,
BIN_ID=bin_id,
bin_start=bin_start,
bin_end=bin_end))
} else{
route_county_id=which(RouteMarker.df$route==route_id &
RouteMarker.df$county_code==county &
RouteMarker.df$start==filter_routeMarker.df$start[ind])
bin_id=1
bin.df=rbind(bin.df, data.frame(CASE_ID=CHP.df$CASE_ID[i],
ROUTE_COUNTY_ID=route_county_id,
BIN_ID=bin_id,
bin_start=filter_routeMarker.df$start[ind],
bin_end=filter_routeMarker.df$end[ind]))
}
}
if (i %% 100000==0){
print(i)
fwrite(bin.df, file = "./bin/collision_segments.csv", sep=",", append=TRUE)
}
}
bin.df=cbind.data.frame(bin.df, route=RouteMarker.df$route[match(bin.df$ROUTE_COUNTY_ID, RouteMarker.df$route_county_id)],
county=RouteMarker.df$county_code[match(bin.df$ROUTE_COUNTY_ID, RouteMarker.df$route_county_id)])
fwrite(bin.df, file = "./bin/collision_segments.csv", sep=",", append=TRUE)
freq_table=setDT(bin.df)[order(route, county, bin_start), .(freq=length(unique(CASE_ID))), by=.(route, county, bin_start, bin_end)]
fwrite(freq_table, file="./bin/collision_density.csv", sep=",", append = FALSE)