Data Wrangling(Assessing)

In [2]:
#Importing basic packages needed to get Data 
import pandas as pd
import requests
import os
import tweepy
import json
In [7]:
# Assigning agreed upon variable names
archive = pd.read_csv('data/twitter-archive-enhanced.csv')
img = pd.read_csv('data/image-predictions.tsv', sep='\t')
api = pd.read_csv('data/twitter_archive_api.csv')
In [3]:
archive.head()
Out[3]:
tweet_id in_reply_to_status_id in_reply_to_user_id timestamp source text retweeted_status_id retweeted_status_user_id retweeted_status_timestamp expanded_urls rating_numerator rating_denominator name doggo floofer pupper puppo
0 892420643555336193 NaN NaN 2017-08-01 16:23:56 +0000 <a href="http://twitter.com/download/iphone" r... This is Phineas. He's a mystical boy. Only eve... NaN NaN NaN https://twitter.com/dog_rates/status/892420643... 13 10 Phineas None None None None
1 892177421306343426 NaN NaN 2017-08-01 00:17:27 +0000 <a href="http://twitter.com/download/iphone" r... This is Tilly. She's just checking pup on you.... NaN NaN NaN https://twitter.com/dog_rates/status/892177421... 13 10 Tilly None None None None
2 891815181378084864 NaN NaN 2017-07-31 00:18:03 +0000 <a href="http://twitter.com/download/iphone" r... This is Archie. He is a rare Norwegian Pouncin... NaN NaN NaN https://twitter.com/dog_rates/status/891815181... 12 10 Archie None None None None
3 891689557279858688 NaN NaN 2017-07-30 15:58:51 +0000 <a href="http://twitter.com/download/iphone" r... This is Darla. She commenced a snooze mid meal... NaN NaN NaN https://twitter.com/dog_rates/status/891689557... 13 10 Darla None None None None
4 891327558926688256 NaN NaN 2017-07-29 16:00:24 +0000 <a href="http://twitter.com/download/iphone" r... This is Franklin. He would like you to stop ca... NaN NaN NaN https://twitter.com/dog_rates/status/891327558... 12 10 Franklin None None None None
In [4]:
val = archive.text[12]
val.split('.')[0].split(" ")[-1]
# print(val.split('/')[1])
val
Out[4]:
"Here's a puppo that seems to be on the fence about something haha no but seriously someone help her. 13/10 https://t.co/BxvuXk0UCm"
In [5]:
index = val.index('/')
print(val[index-2:index])
print(val[index+1:index+3])
13
10
In [6]:
#Code for finding numberator and denominator
index = val.index('/')
rating_numerator = val[index-2:index]
rating_denominator = val[index+1:index+3]
In [7]:
# index = val.index('This is')
# index
In [8]:
list(archive.columns.values)
Out[8]:
['tweet_id',
 'in_reply_to_status_id',
 'in_reply_to_user_id',
 'timestamp',
 'source',
 'text',
 'retweeted_status_id',
 'retweeted_status_user_id',
 'retweeted_status_timestamp',
 'expanded_urls',
 'rating_numerator',
 'rating_denominator',
 'name',
 'doggo',
 'floofer',
 'pupper',
 'puppo']
In [9]:
archive.count()
Out[9]:
tweet_id                      2356
in_reply_to_status_id           78
in_reply_to_user_id             78
timestamp                     2356
source                        2356
text                          2356
retweeted_status_id            181
retweeted_status_user_id       181
retweeted_status_timestamp     181
expanded_urls                 2297
rating_numerator              2356
rating_denominator            2356
name                          2356
doggo                         2356
floofer                       2356
pupper                        2356
puppo                         2356
dtype: int64
In [10]:
img.head(20)
Out[10]:
tweet_id jpg_url img_num p1 p1_conf p1_dog p2 p2_conf p2_dog p3 p3_conf p3_dog
0 666020888022790149 https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg 1 Welsh_springer_spaniel 0.465074 True collie 0.156665 True Shetland_sheepdog 0.061428 True
1 666029285002620928 https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg 1 redbone 0.506826 True miniature_pinscher 0.074192 True Rhodesian_ridgeback 0.072010 True
2 666033412701032449 https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg 1 German_shepherd 0.596461 True malinois 0.138584 True bloodhound 0.116197 True
3 666044226329800704 https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg 1 Rhodesian_ridgeback 0.408143 True redbone 0.360687 True miniature_pinscher 0.222752 True
4 666049248165822465 https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg 1 miniature_pinscher 0.560311 True Rottweiler 0.243682 True Doberman 0.154629 True
5 666050758794694657 https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg 1 Bernese_mountain_dog 0.651137 True English_springer 0.263788 True Greater_Swiss_Mountain_dog 0.016199 True
6 666051853826850816 https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg 1 box_turtle 0.933012 False mud_turtle 0.045885 False terrapin 0.017885 False
7 666055525042405380 https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg 1 chow 0.692517 True Tibetan_mastiff 0.058279 True fur_coat 0.054449 False
8 666057090499244032 https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg 1 shopping_cart 0.962465 False shopping_basket 0.014594 False golden_retriever 0.007959 True
9 666058600524156928 https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg 1 miniature_poodle 0.201493 True komondor 0.192305 True soft-coated_wheaten_terrier 0.082086 True
10 666063827256086533 https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg 1 golden_retriever 0.775930 True Tibetan_mastiff 0.093718 True Labrador_retriever 0.072427 True
11 666071193221509120 https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg 1 Gordon_setter 0.503672 True Yorkshire_terrier 0.174201 True Pekinese 0.109454 True
12 666073100786774016 https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg 1 Walker_hound 0.260857 True English_foxhound 0.175382 True Ibizan_hound 0.097471 True
13 666082916733198337 https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg 1 pug 0.489814 True bull_mastiff 0.404722 True French_bulldog 0.048960 True
14 666094000022159362 https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg 1 bloodhound 0.195217 True German_shepherd 0.078260 True malinois 0.075628 True
15 666099513787052032 https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg 1 Lhasa 0.582330 True Shih-Tzu 0.166192 True Dandie_Dinmont 0.089688 True
16 666102155909144576 https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg 1 English_setter 0.298617 True Newfoundland 0.149842 True borzoi 0.133649 True
17 666104133288665088 https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg 1 hen 0.965932 False cock 0.033919 False partridge 0.000052 False
18 666268910803644416 https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg 1 desktop_computer 0.086502 False desk 0.085547 False bookcase 0.079480 False
19 666273097616637952 https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg 1 Italian_greyhound 0.176053 True toy_terrier 0.111884 True basenji 0.111152 True
In [11]:
img.sample(50)
Out[11]:
tweet_id jpg_url img_num p1 p1_conf p1_dog p2 p2_conf p2_dog p3 p3_conf p3_dog
1982 871762521631449091 https://pbs.twimg.com/media/DBkfY58XcAEdzZy.jpg 2 Labrador_retriever 0.921393 True golden_retriever 0.064608 True bloodhound 0.003383 True
1739 822462944365645825 https://pbs.twimg.com/media/C2n5rUUXEAIXAtv.jpg 3 Pomeranian 0.960199 True Samoyed 0.023056 True Maltese_dog 0.008945 True
764 688916208532455424 https://pbs.twimg.com/media/CY-Fn1FWEAQhzhs.jpg 1 Pembroke 0.430544 True red_fox 0.206576 False Pomeranian 0.154352 True
1277 750086836815486976 https://pbs.twimg.com/media/Cmf5WLGWYAAcmRw.jpg 1 pug 0.978277 True teddy 0.003134 False Brabancon_griffon 0.003061 True
468 675109292475830276 https://pbs.twimg.com/media/CV54UQTXAAAGf-j.jpg 1 dalmatian 0.989519 True English_setter 0.005258 True German_short-haired_pointer 0.001443 True
123 668226093875376128 https://pbs.twimg.com/media/CUYEFlQXAAUkPGm.jpg 1 trombone 0.390339 False cornet 0.314149 False French_horn 0.255182 False
198 669625907762618368 https://pbs.twimg.com/media/CUr9NjgU8AEpf5w.jpg 1 seat_belt 0.874502 False golden_retriever 0.055408 True Labrador_retriever 0.026854 True
854 696488710901260288 https://pbs.twimg.com/media/CapsyfkWcAQ41uC.jpg 1 briard 0.369063 True Scotch_terrier 0.168204 True giant_schnauzer 0.120553 True
1132 728387165835677696 https://pbs.twimg.com/media/ChvAQuMWMAAVaKD.jpg 1 collie 0.266414 True Great_Pyrenees 0.138546 True keeshond 0.109014 True
471 675135153782571009 https://pbs.twimg.com/media/CV6P1lnWIAAUQHk.jpg 1 stove 0.587507 False rotisserie 0.051713 False microwave 0.020725 False
657 682303737705140231 https://pbs.twimg.com/media/CXgHoLnWAAA8i52.jpg 1 seat_belt 0.997659 False Lakeland_terrier 0.001731 True Airedale 0.000204 True
271 670823764196741120 https://pbs.twimg.com/media/CU8-puBWwAAR8Xl.jpg 1 Labrador_retriever 0.947453 True German_short-haired_pointer 0.017001 True Weimaraner 0.015432 True
1233 746131877086527488 https://pbs.twimg.com/media/ClrK-rGWAAENcAa.jpg 1 chow 0.575637 True Pomeranian 0.195950 True Norwich_terrier 0.141224 True
1865 843235543001513987 https://pbs.twimg.com/media/C7PGQJAWwAAibui.jpg 1 Pembroke 0.958452 True Cardigan 0.023770 True Chihuahua 0.005269 True
2032 883360690899218434 https://pbs.twimg.com/media/DEJT3FeXoAAtwUy.jpg 1 chow 0.987997 True Tibetan_mastiff 0.007099 True Newfoundland 0.002140 True
699 684594889858887680 https://pbs.twimg.com/media/CYAra7JWsAACPZH.jpg 1 Weimaraner 0.948688 True English_setter 0.035352 True Brittany_spaniel 0.003879 True
1910 853299958564483072 https://pbs.twimg.com/media/C9eHyF7XgAAOxPM.jpg 1 grille 0.652280 False beach_wagon 0.112846 False convertible 0.086252 False
84 667502640335572993 https://pbs.twimg.com/media/CUNyHTMUYAAQVch.jpg 1 Labrador_retriever 0.996709 True golden_retriever 0.001688 True beagle 0.000712 True
988 707776935007539200 https://pbs.twimg.com/media/CdKHWimWoAABs08.jpg 1 miniature_pinscher 0.890426 True toy_terrier 0.051335 True Chihuahua 0.018015 True
1624 803692223237865472 https://pbs.twimg.com/media/CZhn-QAWwAASQan.jpg 1 Lakeland_terrier 0.530104 True Irish_terrier 0.197314 True Airedale 0.082515 True
1287 751132876104687617 https://pbs.twimg.com/media/CmyPXNOW8AEtaJ-.jpg 1 Labrador_retriever 0.929390 True Chesapeake_Bay_retriever 0.038254 True golden_retriever 0.007610 True
578 678755239630127104 https://pbs.twimg.com/media/CWtsSQAUkAAnWws.jpg 1 malamute 0.606654 True Border_collie 0.193831 True collie 0.048378 True
1251 747933425676525569 https://pbs.twimg.com/media/CmExV2qWkAAn_pN.jpg 1 Samoyed 0.998201 True Eskimo_dog 0.000793 True Great_Pyrenees 0.000296 True
564 678255464182861824 https://pbs.twimg.com/media/CWmlvxJU4AEAqaN.jpg 1 Chihuahua 0.613819 True Yorkshire_terrier 0.127931 True Pomeranian 0.062124 True
1314 754856583969079297 https://pbs.twimg.com/media/CnnKCKNWgAAcOB8.jpg 2 golden_retriever 0.872385 True Labrador_retriever 0.099963 True cocker_spaniel 0.006051 True
141 668567822092664832 https://pbs.twimg.com/media/CUc64knWoAkZt70.jpg 1 Shih-Tzu 0.985649 True Lhasa 0.007078 True Pekinese 0.003053 True
2024 881666595344535552 https://pbs.twimg.com/media/DDxPFwbWAAEbVVR.jpg 1 Saluki 0.529012 True Afghan_hound 0.250003 True golden_retriever 0.160739 True
1383 765669560888528897 https://pbs.twimg.com/media/CqA0XcYWAAAzltT.jpg 1 beagle 0.993333 True Walker_hound 0.002902 True basset 0.002415 True
625 680798457301471234 https://pbs.twimg.com/media/CXKuiyHUEAAMAGa.jpg 1 ram 0.499761 False hog 0.283795 False ox 0.067455 False
1632 805826884734976000 https://pbs.twimg.com/ext_tw_video_thumb/80582... 1 Siberian_husky 0.248926 True American_Staffordshire_terrier 0.098313 True Eskimo_dog 0.080188 True
859 696894894812565505 https://pbs.twimg.com/media/CaveNQcVIAECyBr.jpg 1 Appenzeller 0.665628 True beagle 0.104795 True Greater_Swiss_Mountain_dog 0.067868 True
419 674038233588723717 https://pbs.twimg.com/media/CVqqMtiVEAEye_L.jpg 1 Eskimo_dog 0.358459 True Norwegian_elkhound 0.206963 True malamute 0.148236 True
39 666649482315059201 https://pbs.twimg.com/media/CUBqKnLWwAA5OQB.jpg 1 Border_collie 0.447803 True English_springer 0.170497 True collie 0.139206 True
497 675798442703122432 https://pbs.twimg.com/media/CWDrGH4UYAARoq_.jpg 1 beagle 0.681218 True basset 0.125121 True boxer 0.080398 True
1499 783821107061198850 https://pbs.twimg.com/media/CuCxIzyWEAQTnQA.jpg 1 Lakeland_terrier 0.265659 True golden_retriever 0.196414 True standard_poodle 0.133534 True
320 671789708968640512 https://pbs.twimg.com/tweet_video_thumb/CVKtH-... 1 dalmatian 0.114259 True teddy 0.062275 False steam_locomotive 0.049700 False
1710 818259473185828864 https://pbs.twimg.com/media/C1sKo_QUkAALtkw.jpg 1 miniature_schnauzer 0.367368 True toy_poodle 0.112479 True standard_schnauzer 0.095434 True
226 670361874861563904 https://pbs.twimg.com/media/CU2akCQWsAIbaOV.jpg 1 platypus 0.974075 False spotted_salamander 0.011068 False bison 0.003897 False
77 667437278097252352 https://pbs.twimg.com/media/CUM2qWaWoAUZ06L.jpg 1 porcupine 0.989154 False bath_towel 0.006300 False badger 0.000966 False
668 683030066213818368 https://pbs.twimg.com/media/CXqcOHCUQAAugTB.jpg 1 boxer 0.722218 True bull_mastiff 0.193804 True French_bulldog 0.055194 True
983 707411934438625280 https://pbs.twimg.com/media/CdE7ZktXIAEiWLj.jpg 1 Lakeland_terrier 0.738277 True Airedale 0.028515 True giant_schnauzer 0.024876 True
1846 839239871831150596 https://pbs.twimg.com/media/C6WUNadWYAAPxHv.jpg 3 Leonberg 0.927021 True Newfoundland 0.050009 True Saint_Bernard 0.010728 True
562 677918531514703872 https://pbs.twimg.com/media/CWhzTbzWUAAEAUN.jpg 1 Eskimo_dog 0.199347 True dalmatian 0.153225 True American_Staffordshire_terrier 0.107798 True
505 675891555769696257 https://pbs.twimg.com/media/CWE_x33UwAEE3no.jpg 1 Italian_greyhound 0.305637 True whippet 0.232057 True Great_Dane 0.117806 True
1913 854010172552949760 https://pbs.twimg.com/media/C9oNt91WAAAFSLS.jpg 1 English_springer 0.354733 True collie 0.177538 True Border_collie 0.131706 True
606 679877062409191424 https://pbs.twimg.com/media/CW9olDsUsAA0XSf.jpg 1 hog 0.809466 False hay 0.060178 False lumbermill 0.016483 False
192 669567591774625800 https://pbs.twimg.com/media/CUrIK1DWoAAhECq.jpg 1 Chihuahua 0.980511 True toy_terrier 0.009166 True miniature_pinscher 0.002659 True
1606 800459316964663297 https://pbs.twimg.com/media/CxvNfrhWQAA2hKM.jpg 1 teddy 0.311928 False ice_bear 0.184657 False Christmas_stocking 0.173229 False
1919 855851453814013952 https://pbs.twimg.com/media/C-CYWrvWAAU8AXH.jpg 1 flat-coated_retriever 0.321676 True Labrador_retriever 0.115138 True groenendael 0.096100 True
72 667211855547486208 https://pbs.twimg.com/media/CUJppKJWoAA75NP.jpg 1 golden_retriever 0.462556 True Labrador_retriever 0.454937 True kuvasz 0.024193 True
In [12]:
list(img.columns.values)
Out[12]:
['tweet_id',
 'jpg_url',
 'img_num',
 'p1',
 'p1_conf',
 'p1_dog',
 'p2',
 'p2_conf',
 'p2_dog',
 'p3',
 'p3_conf',
 'p3_dog']
In [8]:
api.head(30)
Out[8]:
Unnamed: 0 tweet_id full_text fav_count retweet_count name dog numerator denominator
0 0 892420643555336193 This is Phineas. He's a mystical boy. Only eve... 38625 8541 Phineas NaN 13 10
1 1 892177421306343426 This is Tilly. She's just checking pup on you.... 33105 6282 Tilly NaN 13 10
2 2 891815181378084864 This is Archie. He is a rare Norwegian Pouncin... 24922 4161 Archie NaN 12 10
3 3 891689557279858688 This is Darla. She commenced a snooze mid meal... 42022 8670 Darla NaN 13 10
4 4 891327558926688256 This is Franklin. He would like you to stop ca... 40172 9422 Franklin NaN 12 10
5 5 891087950875897856 Here we have a majestic great white breaching ... 20140 3118 coast NaN 13 10
6 6 890971913173991426 Meet Jax. He enjoys ice cream so much he gets ... 11807 2076 Jax NaN 13 10
7 7 890729181411237888 When you watch your owner call another dog a g... 65260 18929 boy NaN 13 10
8 8 890609185150312448 This is Zoey. She doesn't want to be one of th... 27682 4275 Zoey NaN 13 10
9 9 890240255349198849 This is Cassie. She is a college pup. Studying... 31823 7434 Cassie doggo 14 10
10 10 890006608113172480 This is Koda. He is a South Australian decksha... 30563 7353 Koda NaN 13 10
11 11 889880896479866881 This is Bruno. He is a service shark. Only get... 27679 4980 Bruno NaN 13 10
12 12 889665388333682689 Here's a puppo that seems to be on the fence a... 47935 10083 her puppo 13 10
13 13 889638837579907072 This is Ted. He does his best. Sometimes that'... 27070 4551 Ted NaN 12 10
14 14 889531135344209921 This is Stuart. He's sporting his favorite fan... 15040 2241 Stuart puppo 13 10
15 15 889278841981685760 This is Oliver. You're witnessing one of his m... 25190 5430 Oliver NaN 13 10
16 16 888917238123831296 This is Jim. He found a fren. Taught him how t... 28972 4508 Jim NaN 12 10
17 17 888804989199671297 This is Zeke. He has a new stick. Very proud o... 25482 4353 Zeke NaN 13 10
18 18 888554962724278272 This is Ralphus. He's powering up. Attempting ... 19824 3588 Ralphus NaN 13 10
19 19 888078434458587136 This is Gerald. He was just told he didn't get... 21677 3500 Gerald NaN 12 10
20 20 887705289381826560 This is Jeffrey. He has a monopoly on the pool... 30067 5405 Jeffrey NaN 13 10
21 21 887517139158093824 I've yet to rate a Venezuelan Hover Wiener. Th... 46065 11693 Wiener NaN 14 10
22 22 887473957103951883 This is Canela. She attempted some fancy porch... 68851 18259 Canela NaN 13 10
23 23 887343217045368832 You may not have known you needed to see this ... 33548 10422 today NaN 13 10
24 24 887101392804085760 This... is a Jubilant Antarctic House Bear. We... 30425 5975 This NaN 12 10
25 25 886983233522544640 This is Maya. She's very shy. Rarely leaves he... 35026 7791 Maya NaN 13 10
26 26 886736880519319552 This is Mingus. He's a wonderful father to his... 12015 3302 Mingus NaN 13 10
27 27 886680336477933568 This is Derek. He's late for a dog meeting. 13... 22325 4477 Derek NaN 13 10
28 28 886366144734445568 This is Roscoe. Another pupper fallen victim t... 21112 3203 Roscoe pupper 12 10
29 29 886267009285017600 @NonWhiteHat @MayhewMayhem omg hello tanner yo... 116 4 caution NaN 12 10
In [9]:
#Listing All Colums Before dropping
list(api.columns.values)
Out[9]:
['Unnamed: 0',
 'tweet_id',
 'full_text',
 'fav_count',
 'retweet_count',
 'name',
 'dog',
 'numerator',
 'denominator']
In [10]:
#Dropping First Column which has Just Numbers
api.drop(columns=['Unnamed: 0'], inplace=True)
#Listing All Colums Before dropping
list(api.columns.values)
Out[10]:
['tweet_id',
 'full_text',
 'fav_count',
 'retweet_count',
 'name',
 'dog',
 'numerator',
 'denominator']
In [11]:
api.head(20)
Out[11]:
tweet_id full_text fav_count retweet_count name dog numerator denominator
0 892420643555336193 This is Phineas. He's a mystical boy. Only eve... 38625 8541 Phineas NaN 13 10
1 892177421306343426 This is Tilly. She's just checking pup on you.... 33105 6282 Tilly NaN 13 10
2 891815181378084864 This is Archie. He is a rare Norwegian Pouncin... 24922 4161 Archie NaN 12 10
3 891689557279858688 This is Darla. She commenced a snooze mid meal... 42022 8670 Darla NaN 13 10
4 891327558926688256 This is Franklin. He would like you to stop ca... 40172 9422 Franklin NaN 12 10
5 891087950875897856 Here we have a majestic great white breaching ... 20140 3118 coast NaN 13 10
6 890971913173991426 Meet Jax. He enjoys ice cream so much he gets ... 11807 2076 Jax NaN 13 10
7 890729181411237888 When you watch your owner call another dog a g... 65260 18929 boy NaN 13 10
8 890609185150312448 This is Zoey. She doesn't want to be one of th... 27682 4275 Zoey NaN 13 10
9 890240255349198849 This is Cassie. She is a college pup. Studying... 31823 7434 Cassie doggo 14 10
10 890006608113172480 This is Koda. He is a South Australian decksha... 30563 7353 Koda NaN 13 10
11 889880896479866881 This is Bruno. He is a service shark. Only get... 27679 4980 Bruno NaN 13 10
12 889665388333682689 Here's a puppo that seems to be on the fence a... 47935 10083 her puppo 13 10
13 889638837579907072 This is Ted. He does his best. Sometimes that'... 27070 4551 Ted NaN 12 10
14 889531135344209921 This is Stuart. He's sporting his favorite fan... 15040 2241 Stuart puppo 13 10
15 889278841981685760 This is Oliver. You're witnessing one of his m... 25190 5430 Oliver NaN 13 10
16 888917238123831296 This is Jim. He found a fren. Taught him how t... 28972 4508 Jim NaN 12 10
17 888804989199671297 This is Zeke. He has a new stick. Very proud o... 25482 4353 Zeke NaN 13 10
18 888554962724278272 This is Ralphus. He's powering up. Attempting ... 19824 3588 Ralphus NaN 13 10
19 888078434458587136 This is Gerald. He was just told he didn't get... 21677 3500 Gerald NaN 12 10

Programatic Assessment

In [17]:
archive.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2356 entries, 0 to 2355
Data columns (total 17 columns):
tweet_id                      2356 non-null int64
in_reply_to_status_id         78 non-null float64
in_reply_to_user_id           78 non-null float64
timestamp                     2356 non-null object
source                        2356 non-null object
text                          2356 non-null object
retweeted_status_id           181 non-null float64
retweeted_status_user_id      181 non-null float64
retweeted_status_timestamp    181 non-null object
expanded_urls                 2297 non-null object
rating_numerator              2356 non-null int64
rating_denominator            2356 non-null int64
name                          2356 non-null object
doggo                         2356 non-null object
floofer                       2356 non-null object
pupper                        2356 non-null object
puppo                         2356 non-null object
dtypes: float64(4), int64(3), object(10)
memory usage: 313.0+ KB
In [18]:
archive.describe()
Out[18]:
tweet_id in_reply_to_status_id in_reply_to_user_id retweeted_status_id retweeted_status_user_id rating_numerator rating_denominator
count 2.356000e+03 7.800000e+01 7.800000e+01 1.810000e+02 1.810000e+02 2356.000000 2356.000000
mean 7.427716e+17 7.455079e+17 2.014171e+16 7.720400e+17 1.241698e+16 13.126486 10.455433
std 6.856705e+16 7.582492e+16 1.252797e+17 6.236928e+16 9.599254e+16 45.876648 6.745237
min 6.660209e+17 6.658147e+17 1.185634e+07 6.661041e+17 7.832140e+05 0.000000 0.000000
25% 6.783989e+17 6.757419e+17 3.086374e+08 7.186315e+17 4.196984e+09 10.000000 10.000000
50% 7.196279e+17 7.038708e+17 4.196984e+09 7.804657e+17 4.196984e+09 11.000000 10.000000
75% 7.993373e+17 8.257804e+17 4.196984e+09 8.203146e+17 4.196984e+09 12.000000 10.000000
max 8.924206e+17 8.862664e+17 8.405479e+17 8.874740e+17 7.874618e+17 1776.000000 170.000000
In [19]:
api.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2344 entries, 0 to 2343
Data columns (total 4 columns):
Tid              2344 non-null int64
fav_count        2344 non-null int64
full_text        2344 non-null object
retweet_count    2344 non-null int64
dtypes: int64(3), object(1)
memory usage: 73.3+ KB
In [20]:
img.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2075 entries, 0 to 2074
Data columns (total 12 columns):
tweet_id    2075 non-null int64
jpg_url     2075 non-null object
img_num     2075 non-null int64
p1          2075 non-null object
p1_conf     2075 non-null float64
p1_dog      2075 non-null bool
p2          2075 non-null object
p2_conf     2075 non-null float64
p2_dog      2075 non-null bool
p3          2075 non-null object
p3_conf     2075 non-null float64
p3_dog      2075 non-null bool
dtypes: bool(3), float64(3), int64(2), object(4)
memory usage: 152.1+ KB
In [21]:
api.head()
Out[21]:
Tid fav_count full_text retweet_count
0 892420643555336193 38625 This is Phineas. He's a mystical boy. Only eve... 8541
1 892177421306343426 33105 This is Tilly. She's just checking pup on you.... 6282
2 891815181378084864 24922 This is Archie. He is a rare Norwegian Pouncin... 4161
3 891689557279858688 42022 This is Darla. She commenced a snooze mid meal... 8670
4 891327558926688256 40172 This is Franklin. He would like you to stop ca... 9422
In [22]:
# archive.subset(retweeted_status_id != 'NaN')
archive(archive['retweeted_status_id'] != None)
patients(patients['city'] == 'New York')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-22-d1dcf400d198> in <module>()
      1 # archive.subset(retweeted_status_id != 'NaN')
----> 2 archive(archive['retweeted_status_id'] != None)
      3 patients(patients['city'] == 'New York')

TypeError: 'DataFrame' object is not callable
In [26]:
# CHecking for Duplicates throughout the data sets


all_colums = pd.Series(list(archive) + list(api) + list(img))
all_colums[all_colums.duplicated()]
Out[26]:
21    tweet_id
dtype: object

Detect and document at least eight (8) quality issues and two (2) tidiness issues in your wrangle_act.ipynb Jupyter Notebook.

In [12]:
archive.duplicated(['tweet_id']).sum()
Out[12]:
0
In [13]:
img.duplicated(['tweet_id']).sum()
Out[13]:
0
In [ ]:
archive.