# list men
list = [a,b,c,d]
list_mean = sum(list) / len(list)
# line plot
plt.plot(list_1, list_2)
# mean (ordinal scales)
list_1 = ["none", "some", "a lot", "none", "a few", "none", "none"]
list_2 = ["none","a few","some","a lot"]
numbers = [list_2.index(i) for i in list_1]
mean = sum(numbers) / len(numbers)
# mean (categorical scales)
cat = [A,A,A,B,A]
num = [1,1,2,4,9]
num_a = [num[i] for i in range(0,len(cat) if cat[i] == 'A']
# histogram (frequency)
plt.hist(list,bins = 100)
plt.show()
# skew refers to asymmetry in the data
# data concentrated right (negative)
# data concentrated left ( positive)
from scipy.stats import skew
positive_skew = skew(test_scores_positive)
# kurtosis the shape of the peak.
plt.hist(test_scores_platy)
plt.ylim(0,3500)
plt.xlim(0,1)
plt.show()
from scipy.stats import kurtosis
kurt_platy = kurtosis(test_scores_platy)
# median, mean, plt.axvline()
plt.hist(test_scores_positive)
median_p = numpy.median(test_scores_positive)
plt.axvline (median_p,color = "g")
plt.axvline(test_scores_positive.mean(),color = "r")
plt.show()
# remove NAN ,dropna(subset)
titanic_survival = pandas.read_csv(f)
new_titanic_survival = titanic_survival.dropna()
new_titanic_survival = titanic_survival.dropna(subset=["age","sex"])
# plot age mean median
import matplotlib.pyplot as plt
import numpy as np
plt.hist(new_titanic_survival["age"])
median = np.median(new_titanic_survival["age"])
plt.axvline(median,color = "g")
plt.axvline(new_titanic_survival["age"].mean(),color = "r")
plt.show()
12.png
# indexes of age
import matplotlib.pyplot as plt
from scipy.stats import skew
from scipy.stats import kurtosis
import numpy as np
mean_age = new_titanic_survival['age'].mean()
median_age = np.median(new_titanic_survival['age'])
skew_age =skew(new_titanic_survival['age'])
kurtosis_age = kurtosis(new_titanic_survival['age'])
网友评论