#Import Statements
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
#importing Final Data found in Data/final Folder
archive = pd.read_csv('data/final/twitter_archive_master.csv',index_col=0)
img = pd.read_csv('data/final/image_predictions.csv', index_col=0)
print("Import Successful")
fav_count
from archive
¶fav_mean = archive.fav_count.mean()
fav_median = archive.fav_count.median()
fav_max = archive.fav_count.max()
fav_sum = archive.fav_count.sum()
archive.fav_count.count()
print("Mean Favourite Value is : {}".format(fav_mean))
print("Median Favourite Value is : {}".format(fav_median))
print("Max Favourite Value for an tweet is : {}".format(fav_max))
print("Total Favourite secured for All Tweets : {}".format(fav_sum))
fav_plt = archive.fav_count.hist(alpha=0.8,figsize=(8,8))
plt.xlabel("Fav Counts");
plt.ylabel("Count of Tweets");
plt.title("Favorited Tweets");
plt.savefig('Docs/Viz/1.png');
retweet_count
from archive
¶archive
ass = np.sort(archive.retweet_count)[::-1]
ass
retweet_mean = archive.retweet_count.mean()
retweet_median = archive.retweet_count.median()
retweet_max = archive.retweet_count.max()
retweet_sum = archive.retweet_count.sum()
archive.retweet_count.hist(alpha=0.8,figsize=(8,8),color = "green")
plt.xlabel("Retweet Counts");
plt.ylabel("Count of Tweets");
plt.title("Re-Tweeted Tweets");
plt.savefig('Docs/Viz/2.png');
print("Mean Retweets Value is : {}".format(retweet_mean))
print("Median Retweets Value is : {}".format(retweet_median))
print("Max Retweets Value for an tweet is : {}".format(retweet_max))
print("Total Retweets secured for All Tweets : {}".format(retweet_sum))
Dog Names
from archive
¶pie = archive.dog.value_counts()
pie.plot(kind="pie");
plt.savefig('Docs/Viz/3.png');
dog_val = archive.dog.value_counts()
name_sum = dog_val[0]+dog_val[1]+dog_val[2]+dog_val[3]
pupper_per = (dog_val[0]/name_sum)*100
doggo_per = (dog_val[1]/name_sum)*100
puppo_per = (dog_val[2]/name_sum)*100
floofer_per = (dog_val[3]/name_sum)*100
print("The Percentile Value of Pupper to all dogs is {}%".format(pupper_per))
print("The Percentile Value of Doggo to all dogs is {}%".format(doggo_per))
print("The Percentile Value of Puppo to all dogs is {}%".format(puppo_per))
print("The Percentile Value of Floofer to all dogs is {}%".format(floofer_per))
dog_val.sum()
numerators
& denominators
from archive
¶num = archive.numerator
dom = archive.denominator
num_mean = num.mean()
num_median = num.median()
num_max = archive.numerator.max()
dom_mean = dom.mean()
dom_median = dom.median()
print("The mean value of all the numerator of the ratings given is : {}".format(num_mean))
print("The median value of all the numerator of the ratings given is : {}".format(num_median))
print("The mean value of all the denominators of the ratings given is : {}".format(dom_mean))
print("The median value of all the denominators of the ratings given is : {}".format(dom_median))
print("".format())
num_plt = num.plot(figsize=(10,10), kind='hist', color="#ff9960");
plt.ylabel("Tweets")
plt.xlabel("Numerator Values")
plt.title("Numerator Histogram");
# num_plt.axes.get_yaxis().set_visible(False)
plt.savefig('Docs/Viz/4.png');
print("The Maximum Rating Numerator given is {}".format(num_max))
time_dt = pd.to_datetime(archive.timestamp).dt.date
time_hr = pd.to_datetime(archive.timestamp).dt.hour
time_yr = pd.to_datetime(archive.timestamp).dt.year
time_hr.value_counts()
time_hr.plot(figsize=(8,6), kind='hist');
plt.title("Hourly Posting Graph");
plt.savefig('Docs/Viz/5.png');
time_dt = time_dt.value_counts()
time_dt.plot(figsize=(15,10));
plt.title("Daily Posting Graph");
plt.savefig('Docs/Viz/6.png');
time_dt.value_counts()
time_yr.value_counts().plot(kind='bar');
plt.title("Yearly Graph Figure");
plt.savefig('Docs/Viz/7.png');
img
DataSet¶#Viewing The DataSet
img.head()
img
¶#Calulating the mean values for the images uploaded per post
img_uploaded_mean = img.img_num.mean()
img_uploaded_median = img.img_num.median()
print("The Average amount of pictures uploaded per tweet is : {}".format(img_uploaded_mean))
print("The Median amount of the uploaded photos is : {}".format(img_uploaded_median))
Neural Network Analysis
p1_conf
,p2_conf
& p3_conf
¶# Calulating the mean values for the confidence prediction varibles
Cp1_mean = img.p1_conf.mean()
Cp2_mean = img.p2_conf.mean()
Cp3_mean = img.p3_conf.mean()
print("Calculating the Efficiency of the Neural Network on the diffrent stages p1,p2,p3")
print("The Average Efficiency of Stage one P1 {} ".format(Cp1_mean))
print("The Average Efficiency of Stage one P2 {} ".format(Cp2_mean))
print("The Average Efficiency of Stage one P3 {} ".format(Cp3_mean))
#using counts to get true false valuses of the data
prediction_p1 = img.p1_dog.value_counts()
prediction_p2 = img.p2_dog.value_counts()
prediction_p3 = img.p3_dog.value_counts()
#Finding percentile values of each
p1_per = prediction_p1[1]/ (prediction_p1[0]+prediction_p1[1])*100
p2_per = prediction_p2[1]/ (prediction_p2[0]+prediction_p2[1])*100
p3_per = prediction_p3[1]/ (prediction_p3[0]+prediction_p3[1])*100
#Printing Above found percentiles.
print("P1 Stage Success Hit Rate is {} %".format(p1_per))
print("P2 Stage Success Hit Rate is {} %".format(p2_per))
print("P3 Stage Success Hit Rate is {} %".format(p3_per))
#Anaylzing which dogs are the most popular through diffrent stages of the neural network.
d_p1 = img.p1.value_counts()
d_p2 = img.p2.value_counts()
d_p3 = img.p3.value_counts()
#Dumping the Data to Read and Anaylyze
print("Finding out the most popular Dogs for Each Stage \n")
#print("P1")
print("The Top Popular Dogs for Stage P1 Are :\n{} \n".format(d_p1.head()))
#print("P2")
print("The Top Popular Dogs for Stage P2 Are :\n{} \n".format(d_p2.head()))
#print("P3")
print("The Top Popular Dogs for Stage P3 Are :\n{} \n".format(d_p3.head()))
# Merging all the Data into one Series so i can get a better picutre for joint anaylysis
all_dogs = pd.concat([d_p1, d_p2, d_p3])
d_all = all_dogs.groupby(all_dogs.index).aggregate(sum)
d_all = d_all.sort_values(ascending=False)
print("The Top 10 Dogs overall through all the stages in our DataSet are \n\n{}\n".format(d_all.head(10)))