-
Notifications
You must be signed in to change notification settings - Fork 3
/
Analysis Tools Week 3.py
97 lines (57 loc) · 3.37 KB
/
Analysis Tools Week 3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 7 15:00:39 2019
@author: Voltas
"""
import pandas
import numpy
import seaborn
import scipy
import matplotlib.pyplot as plt
nesarc = pandas.read_csv ('nesarc_pds.csv' , low_memory=False)
#Set PANDAS to show all columns in DataFrame
pandas.set_option('display.max_columns', None)
#Set PANDAS to show all rows in DataFrame
pandas.set_option('display.max_rows', None)
nesarc.columns = map(str.upper , nesarc.columns)
pandas.set_option('display.float_format' , lambda x:'%f'%x)
# Change my variables to numeric
nesarc['AGE'] = pandas.to_numeric(nesarc['AGE'], errors='coerce')
nesarc['S3BQ4'] = pandas.to_numeric(nesarc['S3BQ4'], errors='coerce')
nesarc['S4AQ6A'] = pandas.to_numeric(nesarc['S4AQ6A'], errors='coerce')
nesarc['S3BD5Q2F'] = pandas.to_numeric(nesarc['S3BD5Q2F'], errors='coerce')
nesarc['S9Q6A'] = pandas.to_numeric(nesarc['S9Q6A'], errors='coerce')
nesarc['S4AQ7'] = pandas.to_numeric(nesarc['S4AQ7'], errors='coerce')
nesarc['S3BQ1A5'] = pandas.to_numeric(nesarc['S3BQ1A5'], errors='coerce')
# Subset my sample
subset1 = nesarc[(nesarc['S3BQ1A5']==1)] # Cannabis users
subsetc1 = subset1.copy()
# Setting missing data
subsetc1['S3BQ1A5']=subsetc1['S3BQ1A5'].replace(9, numpy.nan)
subsetc1['S3BD5Q2F']=subsetc1['S3BD5Q2F'].replace('BL', numpy.nan)
subsetc1['S3BD5Q2F']=subsetc1['S3BD5Q2F'].replace(99, numpy.nan)
subsetc1['S4AQ6A']=subsetc1['S4AQ6A'].replace('BL', numpy.nan)
subsetc1['S4AQ6A']=subsetc1['S4AQ6A'].replace(99, numpy.nan)
subsetc1['S9Q6A']=subsetc1['S9Q6A'].replace('BL', numpy.nan)
subsetc1['S9Q6A']=subsetc1['S9Q6A'].replace(99, numpy.nan)
# Scatterplot for the age when began using cannabis the most and the age of first episode of major depression
plt.figure(figsize=(12,4)) # Change plot size
scat1 = seaborn.regplot(x="S3BD5Q2F", y="S4AQ6A", fit_reg=True, data=subset1)
plt.xlabel('Age when began using cannabis the most')
plt.ylabel('Age when expirenced the first episode of major depression')
plt.title('Scatterplot for the age when began using cannabis the most and the age of first the episode of major depression')
plt.show()
data_clean=subset1.dropna()
# Pearson correlation coefficient for the age when began using cannabis the most and the age of first the episode of major depression
print ('Association between the age when began using cannabis the most and the age of the first episode of major depression')
print (scipy.stats.pearsonr(data_clean['S3BD5Q2F'], data_clean['S4AQ6A']))
# Scatterplot for the age when began using cannabis the most and the age of the first episode of general anxiety
plt.figure(figsize=(12,4)) # Change plot size
scat2 = seaborn.regplot(x="S3BD5Q2F", y="S9Q6A", fit_reg=True, data=subset1)
plt.xlabel('Age when began using cannabis the most')
plt.ylabel('Age when expirenced the first episode of general anxiety')
plt.title('Scatterplot for the age when began using cannabis the most and the age of the first episode of general anxiety')
plt.show()
# Pearson correlation coefficient for the age when began using cannabis the most and the age of the first episode of general anxiety
print ('Association between the age when began using cannabis the most and the age of first the episode of general anxiety')
print (scipy.stats.pearsonr(data_clean['S3BD5Q2F'], data_clean['S9Q6A']))