-
Notifications
You must be signed in to change notification settings - Fork 0
/
categorized_crime_shannon.py
109 lines (82 loc) · 2.96 KB
/
categorized_crime_shannon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 20:56:28 2018
@author: Samira
"""
#!/usr/bin/env python
# Shannon Diversity Index
# http://en.wikipedia.org/wiki/Shannon_index
import sys
from collections import defaultdict
def sdi(data):
""" Given a hash { 'species': count } , returns the SDI
>>> sdi({'a': 10, 'b': 20, 'c': 30,})
1.0114042647073518"""
from math import log as ln
def p(n, N):
""" Relative abundance """
if n is 0:
return 0
else:
return (float(n)/N) * ln(float(n)/N)
N = sum(data.values())
return -sum(p(n, N) for n in data.values() if n is not 0)
def simpson_di(data):
""" Given a hash { 'species': count } , returns the Simpson Diversity Index
>>> simpson_di({'a': 10, 'b': 20, 'c': 30,})
0.3888888888888889
"""
def p(n, N):
""" Relative abundance """
if n is 0:
return 0
else:
return float(n)/N
N = sum(data.values())
return sum(p(n, N)**2 for n in data.values() if n is not 0)
def inverse_simpson_di(data):
""" Given a hash { 'species': count } , returns the inverse Simpson Diversity Index
>>> inverse_simpson_di({'a': 10, 'b': 20, 'c': 30,})
2.571428571428571
"""
return float(1)/simpson_di(data)
if __name__ == '__main__':
import pandas as pd
#doctest.testmod()
#grouped_df = sdi({'a': 10, 'b': 20, 'c': 30})
#df = pd.read_csv("la_crime_data/label_census_grouped.csv", sep=',')
crim_cats = pd.read_csv("la_crime_data/New_data/Crime_Categories_LA.csv", sep=',')
data = crim_cats.loc[:,['Row#','Violence', 'P1Prop', 'P2P']]
wanted_category = "violence"
data_violence = data.loc[data['Violence'] >0]
target_cats = data_violence["Row#"]
df = pd.read_csv("la_crime_data/lat_lon_census_grouped.csv", sep=',')
dic = defaultdict(list)
unique_osmids = df['census'].unique()
for index, row in df.iterrows():
for mygroup in unique_osmids:
if row["census"] == mygroup:
#Check whether this row is wanted or not
my_cat = row["CRIMECLASSCODE"].split()
#target = False
for i in range(len(my_cat)):
# if my_cat[i] in target_cats:
# target = True
#if target:
dic[mygroup].append({my_cat[i]: row["count"]})
xyz = []
print("Line 92")
for entry,data in dic.iteritems():
result = {}
for d in data:
result.update(d)
xyz.append({"census": entry, "data": result})
xyz_shannon = []
for entry in xyz:
xyz_shannon.append({"census": entry["census"], "data": entry["data"], "shannon_index": sdi(entry["data"])})
print("here")
import pickle
with open('la_crime_data/census_violent_shannon.dat', 'wb') as outfile:
pickle.dump(xyz_shannon, outfile)
with open('shannon.dat') as f:
x = pickle.load(f)