#Importing basic packages needed to get Data
import pandas as pd
import requests
import os
import tweepy
import json
# Assigning agreed upon variable names
archive = pd.read_csv('data/twitter-archive-enhanced.csv')
img = pd.read_csv('data/image-predictions.tsv', sep='\t')
api = pd.read_csv('data/twitter_archive_api.csv')
archive.head()
tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | NaN | NaN | 2017-08-01 16:23:56 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Phineas. He's a mystical boy. Only eve... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892420643... | 13 | 10 | Phineas | None | None | None | None |
1 | 892177421306343426 | NaN | NaN | 2017-08-01 00:17:27 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tilly. She's just checking pup on you.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892177421... | 13 | 10 | Tilly | None | None | None | None |
2 | 891815181378084864 | NaN | NaN | 2017-07-31 00:18:03 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Archie. He is a rare Norwegian Pouncin... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891815181... | 12 | 10 | Archie | None | None | None | None |
3 | 891689557279858688 | NaN | NaN | 2017-07-30 15:58:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Darla. She commenced a snooze mid meal... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891689557... | 13 | 10 | Darla | None | None | None | None |
4 | 891327558926688256 | NaN | NaN | 2017-07-29 16:00:24 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Franklin. He would like you to stop ca... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891327558... | 12 | 10 | Franklin | None | None | None | None |
val = archive.text[12]
val.split('.')[0].split(" ")[-1]
# print(val.split('/')[1])
val
"Here's a puppo that seems to be on the fence about something haha no but seriously someone help her. 13/10 https://t.co/BxvuXk0UCm"
index = val.index('/')
print(val[index-2:index])
print(val[index+1:index+3])
13 10
#Code for finding numberator and denominator
index = val.index('/')
rating_numerator = val[index-2:index]
rating_denominator = val[index+1:index+3]
# index = val.index('This is')
# index
list(archive.columns.values)
['tweet_id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'timestamp', 'source', 'text', 'retweeted_status_id', 'retweeted_status_user_id', 'retweeted_status_timestamp', 'expanded_urls', 'rating_numerator', 'rating_denominator', 'name', 'doggo', 'floofer', 'pupper', 'puppo']
archive.count()
tweet_id 2356 in_reply_to_status_id 78 in_reply_to_user_id 78 timestamp 2356 source 2356 text 2356 retweeted_status_id 181 retweeted_status_user_id 181 retweeted_status_timestamp 181 expanded_urls 2297 rating_numerator 2356 rating_denominator 2356 name 2356 doggo 2356 floofer 2356 pupper 2356 puppo 2356 dtype: int64
img.head(20)
tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 666020888022790149 | https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg | 1 | Welsh_springer_spaniel | 0.465074 | True | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True |
1 | 666029285002620928 | https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg | 1 | redbone | 0.506826 | True | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True |
2 | 666033412701032449 | https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg | 1 | German_shepherd | 0.596461 | True | malinois | 0.138584 | True | bloodhound | 0.116197 | True |
3 | 666044226329800704 | https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg | 1 | Rhodesian_ridgeback | 0.408143 | True | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True |
4 | 666049248165822465 | https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg | 1 | miniature_pinscher | 0.560311 | True | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True |
5 | 666050758794694657 | https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg | 1 | Bernese_mountain_dog | 0.651137 | True | English_springer | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True |
6 | 666051853826850816 | https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg | 1 | box_turtle | 0.933012 | False | mud_turtle | 0.045885 | False | terrapin | 0.017885 | False |
7 | 666055525042405380 | https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg | 1 | chow | 0.692517 | True | Tibetan_mastiff | 0.058279 | True | fur_coat | 0.054449 | False |
8 | 666057090499244032 | https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg | 1 | shopping_cart | 0.962465 | False | shopping_basket | 0.014594 | False | golden_retriever | 0.007959 | True |
9 | 666058600524156928 | https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg | 1 | miniature_poodle | 0.201493 | True | komondor | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True |
10 | 666063827256086533 | https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg | 1 | golden_retriever | 0.775930 | True | Tibetan_mastiff | 0.093718 | True | Labrador_retriever | 0.072427 | True |
11 | 666071193221509120 | https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg | 1 | Gordon_setter | 0.503672 | True | Yorkshire_terrier | 0.174201 | True | Pekinese | 0.109454 | True |
12 | 666073100786774016 | https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg | 1 | Walker_hound | 0.260857 | True | English_foxhound | 0.175382 | True | Ibizan_hound | 0.097471 | True |
13 | 666082916733198337 | https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg | 1 | pug | 0.489814 | True | bull_mastiff | 0.404722 | True | French_bulldog | 0.048960 | True |
14 | 666094000022159362 | https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg | 1 | bloodhound | 0.195217 | True | German_shepherd | 0.078260 | True | malinois | 0.075628 | True |
15 | 666099513787052032 | https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg | 1 | Lhasa | 0.582330 | True | Shih-Tzu | 0.166192 | True | Dandie_Dinmont | 0.089688 | True |
16 | 666102155909144576 | https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg | 1 | English_setter | 0.298617 | True | Newfoundland | 0.149842 | True | borzoi | 0.133649 | True |
17 | 666104133288665088 | https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg | 1 | hen | 0.965932 | False | cock | 0.033919 | False | partridge | 0.000052 | False |
18 | 666268910803644416 | https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg | 1 | desktop_computer | 0.086502 | False | desk | 0.085547 | False | bookcase | 0.079480 | False |
19 | 666273097616637952 | https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg | 1 | Italian_greyhound | 0.176053 | True | toy_terrier | 0.111884 | True | basenji | 0.111152 | True |
img.sample(50)
tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1982 | 871762521631449091 | https://pbs.twimg.com/media/DBkfY58XcAEdzZy.jpg | 2 | Labrador_retriever | 0.921393 | True | golden_retriever | 0.064608 | True | bloodhound | 0.003383 | True |
1739 | 822462944365645825 | https://pbs.twimg.com/media/C2n5rUUXEAIXAtv.jpg | 3 | Pomeranian | 0.960199 | True | Samoyed | 0.023056 | True | Maltese_dog | 0.008945 | True |
764 | 688916208532455424 | https://pbs.twimg.com/media/CY-Fn1FWEAQhzhs.jpg | 1 | Pembroke | 0.430544 | True | red_fox | 0.206576 | False | Pomeranian | 0.154352 | True |
1277 | 750086836815486976 | https://pbs.twimg.com/media/Cmf5WLGWYAAcmRw.jpg | 1 | pug | 0.978277 | True | teddy | 0.003134 | False | Brabancon_griffon | 0.003061 | True |
468 | 675109292475830276 | https://pbs.twimg.com/media/CV54UQTXAAAGf-j.jpg | 1 | dalmatian | 0.989519 | True | English_setter | 0.005258 | True | German_short-haired_pointer | 0.001443 | True |
123 | 668226093875376128 | https://pbs.twimg.com/media/CUYEFlQXAAUkPGm.jpg | 1 | trombone | 0.390339 | False | cornet | 0.314149 | False | French_horn | 0.255182 | False |
198 | 669625907762618368 | https://pbs.twimg.com/media/CUr9NjgU8AEpf5w.jpg | 1 | seat_belt | 0.874502 | False | golden_retriever | 0.055408 | True | Labrador_retriever | 0.026854 | True |
854 | 696488710901260288 | https://pbs.twimg.com/media/CapsyfkWcAQ41uC.jpg | 1 | briard | 0.369063 | True | Scotch_terrier | 0.168204 | True | giant_schnauzer | 0.120553 | True |
1132 | 728387165835677696 | https://pbs.twimg.com/media/ChvAQuMWMAAVaKD.jpg | 1 | collie | 0.266414 | True | Great_Pyrenees | 0.138546 | True | keeshond | 0.109014 | True |
471 | 675135153782571009 | https://pbs.twimg.com/media/CV6P1lnWIAAUQHk.jpg | 1 | stove | 0.587507 | False | rotisserie | 0.051713 | False | microwave | 0.020725 | False |
657 | 682303737705140231 | https://pbs.twimg.com/media/CXgHoLnWAAA8i52.jpg | 1 | seat_belt | 0.997659 | False | Lakeland_terrier | 0.001731 | True | Airedale | 0.000204 | True |
271 | 670823764196741120 | https://pbs.twimg.com/media/CU8-puBWwAAR8Xl.jpg | 1 | Labrador_retriever | 0.947453 | True | German_short-haired_pointer | 0.017001 | True | Weimaraner | 0.015432 | True |
1233 | 746131877086527488 | https://pbs.twimg.com/media/ClrK-rGWAAENcAa.jpg | 1 | chow | 0.575637 | True | Pomeranian | 0.195950 | True | Norwich_terrier | 0.141224 | True |
1865 | 843235543001513987 | https://pbs.twimg.com/media/C7PGQJAWwAAibui.jpg | 1 | Pembroke | 0.958452 | True | Cardigan | 0.023770 | True | Chihuahua | 0.005269 | True |
2032 | 883360690899218434 | https://pbs.twimg.com/media/DEJT3FeXoAAtwUy.jpg | 1 | chow | 0.987997 | True | Tibetan_mastiff | 0.007099 | True | Newfoundland | 0.002140 | True |
699 | 684594889858887680 | https://pbs.twimg.com/media/CYAra7JWsAACPZH.jpg | 1 | Weimaraner | 0.948688 | True | English_setter | 0.035352 | True | Brittany_spaniel | 0.003879 | True |
1910 | 853299958564483072 | https://pbs.twimg.com/media/C9eHyF7XgAAOxPM.jpg | 1 | grille | 0.652280 | False | beach_wagon | 0.112846 | False | convertible | 0.086252 | False |
84 | 667502640335572993 | https://pbs.twimg.com/media/CUNyHTMUYAAQVch.jpg | 1 | Labrador_retriever | 0.996709 | True | golden_retriever | 0.001688 | True | beagle | 0.000712 | True |
988 | 707776935007539200 | https://pbs.twimg.com/media/CdKHWimWoAABs08.jpg | 1 | miniature_pinscher | 0.890426 | True | toy_terrier | 0.051335 | True | Chihuahua | 0.018015 | True |
1624 | 803692223237865472 | https://pbs.twimg.com/media/CZhn-QAWwAASQan.jpg | 1 | Lakeland_terrier | 0.530104 | True | Irish_terrier | 0.197314 | True | Airedale | 0.082515 | True |
1287 | 751132876104687617 | https://pbs.twimg.com/media/CmyPXNOW8AEtaJ-.jpg | 1 | Labrador_retriever | 0.929390 | True | Chesapeake_Bay_retriever | 0.038254 | True | golden_retriever | 0.007610 | True |
578 | 678755239630127104 | https://pbs.twimg.com/media/CWtsSQAUkAAnWws.jpg | 1 | malamute | 0.606654 | True | Border_collie | 0.193831 | True | collie | 0.048378 | True |
1251 | 747933425676525569 | https://pbs.twimg.com/media/CmExV2qWkAAn_pN.jpg | 1 | Samoyed | 0.998201 | True | Eskimo_dog | 0.000793 | True | Great_Pyrenees | 0.000296 | True |
564 | 678255464182861824 | https://pbs.twimg.com/media/CWmlvxJU4AEAqaN.jpg | 1 | Chihuahua | 0.613819 | True | Yorkshire_terrier | 0.127931 | True | Pomeranian | 0.062124 | True |
1314 | 754856583969079297 | https://pbs.twimg.com/media/CnnKCKNWgAAcOB8.jpg | 2 | golden_retriever | 0.872385 | True | Labrador_retriever | 0.099963 | True | cocker_spaniel | 0.006051 | True |
141 | 668567822092664832 | https://pbs.twimg.com/media/CUc64knWoAkZt70.jpg | 1 | Shih-Tzu | 0.985649 | True | Lhasa | 0.007078 | True | Pekinese | 0.003053 | True |
2024 | 881666595344535552 | https://pbs.twimg.com/media/DDxPFwbWAAEbVVR.jpg | 1 | Saluki | 0.529012 | True | Afghan_hound | 0.250003 | True | golden_retriever | 0.160739 | True |
1383 | 765669560888528897 | https://pbs.twimg.com/media/CqA0XcYWAAAzltT.jpg | 1 | beagle | 0.993333 | True | Walker_hound | 0.002902 | True | basset | 0.002415 | True |
625 | 680798457301471234 | https://pbs.twimg.com/media/CXKuiyHUEAAMAGa.jpg | 1 | ram | 0.499761 | False | hog | 0.283795 | False | ox | 0.067455 | False |
1632 | 805826884734976000 | https://pbs.twimg.com/ext_tw_video_thumb/80582... | 1 | Siberian_husky | 0.248926 | True | American_Staffordshire_terrier | 0.098313 | True | Eskimo_dog | 0.080188 | True |
859 | 696894894812565505 | https://pbs.twimg.com/media/CaveNQcVIAECyBr.jpg | 1 | Appenzeller | 0.665628 | True | beagle | 0.104795 | True | Greater_Swiss_Mountain_dog | 0.067868 | True |
419 | 674038233588723717 | https://pbs.twimg.com/media/CVqqMtiVEAEye_L.jpg | 1 | Eskimo_dog | 0.358459 | True | Norwegian_elkhound | 0.206963 | True | malamute | 0.148236 | True |
39 | 666649482315059201 | https://pbs.twimg.com/media/CUBqKnLWwAA5OQB.jpg | 1 | Border_collie | 0.447803 | True | English_springer | 0.170497 | True | collie | 0.139206 | True |
497 | 675798442703122432 | https://pbs.twimg.com/media/CWDrGH4UYAARoq_.jpg | 1 | beagle | 0.681218 | True | basset | 0.125121 | True | boxer | 0.080398 | True |
1499 | 783821107061198850 | https://pbs.twimg.com/media/CuCxIzyWEAQTnQA.jpg | 1 | Lakeland_terrier | 0.265659 | True | golden_retriever | 0.196414 | True | standard_poodle | 0.133534 | True |
320 | 671789708968640512 | https://pbs.twimg.com/tweet_video_thumb/CVKtH-... | 1 | dalmatian | 0.114259 | True | teddy | 0.062275 | False | steam_locomotive | 0.049700 | False |
1710 | 818259473185828864 | https://pbs.twimg.com/media/C1sKo_QUkAALtkw.jpg | 1 | miniature_schnauzer | 0.367368 | True | toy_poodle | 0.112479 | True | standard_schnauzer | 0.095434 | True |
226 | 670361874861563904 | https://pbs.twimg.com/media/CU2akCQWsAIbaOV.jpg | 1 | platypus | 0.974075 | False | spotted_salamander | 0.011068 | False | bison | 0.003897 | False |
77 | 667437278097252352 | https://pbs.twimg.com/media/CUM2qWaWoAUZ06L.jpg | 1 | porcupine | 0.989154 | False | bath_towel | 0.006300 | False | badger | 0.000966 | False |
668 | 683030066213818368 | https://pbs.twimg.com/media/CXqcOHCUQAAugTB.jpg | 1 | boxer | 0.722218 | True | bull_mastiff | 0.193804 | True | French_bulldog | 0.055194 | True |
983 | 707411934438625280 | https://pbs.twimg.com/media/CdE7ZktXIAEiWLj.jpg | 1 | Lakeland_terrier | 0.738277 | True | Airedale | 0.028515 | True | giant_schnauzer | 0.024876 | True |
1846 | 839239871831150596 | https://pbs.twimg.com/media/C6WUNadWYAAPxHv.jpg | 3 | Leonberg | 0.927021 | True | Newfoundland | 0.050009 | True | Saint_Bernard | 0.010728 | True |
562 | 677918531514703872 | https://pbs.twimg.com/media/CWhzTbzWUAAEAUN.jpg | 1 | Eskimo_dog | 0.199347 | True | dalmatian | 0.153225 | True | American_Staffordshire_terrier | 0.107798 | True |
505 | 675891555769696257 | https://pbs.twimg.com/media/CWE_x33UwAEE3no.jpg | 1 | Italian_greyhound | 0.305637 | True | whippet | 0.232057 | True | Great_Dane | 0.117806 | True |
1913 | 854010172552949760 | https://pbs.twimg.com/media/C9oNt91WAAAFSLS.jpg | 1 | English_springer | 0.354733 | True | collie | 0.177538 | True | Border_collie | 0.131706 | True |
606 | 679877062409191424 | https://pbs.twimg.com/media/CW9olDsUsAA0XSf.jpg | 1 | hog | 0.809466 | False | hay | 0.060178 | False | lumbermill | 0.016483 | False |
192 | 669567591774625800 | https://pbs.twimg.com/media/CUrIK1DWoAAhECq.jpg | 1 | Chihuahua | 0.980511 | True | toy_terrier | 0.009166 | True | miniature_pinscher | 0.002659 | True |
1606 | 800459316964663297 | https://pbs.twimg.com/media/CxvNfrhWQAA2hKM.jpg | 1 | teddy | 0.311928 | False | ice_bear | 0.184657 | False | Christmas_stocking | 0.173229 | False |
1919 | 855851453814013952 | https://pbs.twimg.com/media/C-CYWrvWAAU8AXH.jpg | 1 | flat-coated_retriever | 0.321676 | True | Labrador_retriever | 0.115138 | True | groenendael | 0.096100 | True |
72 | 667211855547486208 | https://pbs.twimg.com/media/CUJppKJWoAA75NP.jpg | 1 | golden_retriever | 0.462556 | True | Labrador_retriever | 0.454937 | True | kuvasz | 0.024193 | True |
list(img.columns.values)
['tweet_id', 'jpg_url', 'img_num', 'p1', 'p1_conf', 'p1_dog', 'p2', 'p2_conf', 'p2_dog', 'p3', 'p3_conf', 'p3_dog']
api.head(30)
Unnamed: 0 | tweet_id | full_text | fav_count | retweet_count | name | dog | numerator | denominator | |
---|---|---|---|---|---|---|---|---|---|
0 | 0 | 892420643555336193 | This is Phineas. He's a mystical boy. Only eve... | 38625 | 8541 | Phineas | NaN | 13 | 10 |
1 | 1 | 892177421306343426 | This is Tilly. She's just checking pup on you.... | 33105 | 6282 | Tilly | NaN | 13 | 10 |
2 | 2 | 891815181378084864 | This is Archie. He is a rare Norwegian Pouncin... | 24922 | 4161 | Archie | NaN | 12 | 10 |
3 | 3 | 891689557279858688 | This is Darla. She commenced a snooze mid meal... | 42022 | 8670 | Darla | NaN | 13 | 10 |
4 | 4 | 891327558926688256 | This is Franklin. He would like you to stop ca... | 40172 | 9422 | Franklin | NaN | 12 | 10 |
5 | 5 | 891087950875897856 | Here we have a majestic great white breaching ... | 20140 | 3118 | coast | NaN | 13 | 10 |
6 | 6 | 890971913173991426 | Meet Jax. He enjoys ice cream so much he gets ... | 11807 | 2076 | Jax | NaN | 13 | 10 |
7 | 7 | 890729181411237888 | When you watch your owner call another dog a g... | 65260 | 18929 | boy | NaN | 13 | 10 |
8 | 8 | 890609185150312448 | This is Zoey. She doesn't want to be one of th... | 27682 | 4275 | Zoey | NaN | 13 | 10 |
9 | 9 | 890240255349198849 | This is Cassie. She is a college pup. Studying... | 31823 | 7434 | Cassie | doggo | 14 | 10 |
10 | 10 | 890006608113172480 | This is Koda. He is a South Australian decksha... | 30563 | 7353 | Koda | NaN | 13 | 10 |
11 | 11 | 889880896479866881 | This is Bruno. He is a service shark. Only get... | 27679 | 4980 | Bruno | NaN | 13 | 10 |
12 | 12 | 889665388333682689 | Here's a puppo that seems to be on the fence a... | 47935 | 10083 | her | puppo | 13 | 10 |
13 | 13 | 889638837579907072 | This is Ted. He does his best. Sometimes that'... | 27070 | 4551 | Ted | NaN | 12 | 10 |
14 | 14 | 889531135344209921 | This is Stuart. He's sporting his favorite fan... | 15040 | 2241 | Stuart | puppo | 13 | 10 |
15 | 15 | 889278841981685760 | This is Oliver. You're witnessing one of his m... | 25190 | 5430 | Oliver | NaN | 13 | 10 |
16 | 16 | 888917238123831296 | This is Jim. He found a fren. Taught him how t... | 28972 | 4508 | Jim | NaN | 12 | 10 |
17 | 17 | 888804989199671297 | This is Zeke. He has a new stick. Very proud o... | 25482 | 4353 | Zeke | NaN | 13 | 10 |
18 | 18 | 888554962724278272 | This is Ralphus. He's powering up. Attempting ... | 19824 | 3588 | Ralphus | NaN | 13 | 10 |
19 | 19 | 888078434458587136 | This is Gerald. He was just told he didn't get... | 21677 | 3500 | Gerald | NaN | 12 | 10 |
20 | 20 | 887705289381826560 | This is Jeffrey. He has a monopoly on the pool... | 30067 | 5405 | Jeffrey | NaN | 13 | 10 |
21 | 21 | 887517139158093824 | I've yet to rate a Venezuelan Hover Wiener. Th... | 46065 | 11693 | Wiener | NaN | 14 | 10 |
22 | 22 | 887473957103951883 | This is Canela. She attempted some fancy porch... | 68851 | 18259 | Canela | NaN | 13 | 10 |
23 | 23 | 887343217045368832 | You may not have known you needed to see this ... | 33548 | 10422 | today | NaN | 13 | 10 |
24 | 24 | 887101392804085760 | This... is a Jubilant Antarctic House Bear. We... | 30425 | 5975 | This | NaN | 12 | 10 |
25 | 25 | 886983233522544640 | This is Maya. She's very shy. Rarely leaves he... | 35026 | 7791 | Maya | NaN | 13 | 10 |
26 | 26 | 886736880519319552 | This is Mingus. He's a wonderful father to his... | 12015 | 3302 | Mingus | NaN | 13 | 10 |
27 | 27 | 886680336477933568 | This is Derek. He's late for a dog meeting. 13... | 22325 | 4477 | Derek | NaN | 13 | 10 |
28 | 28 | 886366144734445568 | This is Roscoe. Another pupper fallen victim t... | 21112 | 3203 | Roscoe | pupper | 12 | 10 |
29 | 29 | 886267009285017600 | @NonWhiteHat @MayhewMayhem omg hello tanner yo... | 116 | 4 | caution | NaN | 12 | 10 |
#Listing All Colums Before dropping
list(api.columns.values)
['Unnamed: 0', 'tweet_id', 'full_text', 'fav_count', 'retweet_count', 'name', 'dog', 'numerator', 'denominator']
#Dropping First Column which has Just Numbers
api.drop(columns=['Unnamed: 0'], inplace=True)
#Listing All Colums Before dropping
list(api.columns.values)
['tweet_id', 'full_text', 'fav_count', 'retweet_count', 'name', 'dog', 'numerator', 'denominator']
api.head(20)
tweet_id | full_text | fav_count | retweet_count | name | dog | numerator | denominator | |
---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | This is Phineas. He's a mystical boy. Only eve... | 38625 | 8541 | Phineas | NaN | 13 | 10 |
1 | 892177421306343426 | This is Tilly. She's just checking pup on you.... | 33105 | 6282 | Tilly | NaN | 13 | 10 |
2 | 891815181378084864 | This is Archie. He is a rare Norwegian Pouncin... | 24922 | 4161 | Archie | NaN | 12 | 10 |
3 | 891689557279858688 | This is Darla. She commenced a snooze mid meal... | 42022 | 8670 | Darla | NaN | 13 | 10 |
4 | 891327558926688256 | This is Franklin. He would like you to stop ca... | 40172 | 9422 | Franklin | NaN | 12 | 10 |
5 | 891087950875897856 | Here we have a majestic great white breaching ... | 20140 | 3118 | coast | NaN | 13 | 10 |
6 | 890971913173991426 | Meet Jax. He enjoys ice cream so much he gets ... | 11807 | 2076 | Jax | NaN | 13 | 10 |
7 | 890729181411237888 | When you watch your owner call another dog a g... | 65260 | 18929 | boy | NaN | 13 | 10 |
8 | 890609185150312448 | This is Zoey. She doesn't want to be one of th... | 27682 | 4275 | Zoey | NaN | 13 | 10 |
9 | 890240255349198849 | This is Cassie. She is a college pup. Studying... | 31823 | 7434 | Cassie | doggo | 14 | 10 |
10 | 890006608113172480 | This is Koda. He is a South Australian decksha... | 30563 | 7353 | Koda | NaN | 13 | 10 |
11 | 889880896479866881 | This is Bruno. He is a service shark. Only get... | 27679 | 4980 | Bruno | NaN | 13 | 10 |
12 | 889665388333682689 | Here's a puppo that seems to be on the fence a... | 47935 | 10083 | her | puppo | 13 | 10 |
13 | 889638837579907072 | This is Ted. He does his best. Sometimes that'... | 27070 | 4551 | Ted | NaN | 12 | 10 |
14 | 889531135344209921 | This is Stuart. He's sporting his favorite fan... | 15040 | 2241 | Stuart | puppo | 13 | 10 |
15 | 889278841981685760 | This is Oliver. You're witnessing one of his m... | 25190 | 5430 | Oliver | NaN | 13 | 10 |
16 | 888917238123831296 | This is Jim. He found a fren. Taught him how t... | 28972 | 4508 | Jim | NaN | 12 | 10 |
17 | 888804989199671297 | This is Zeke. He has a new stick. Very proud o... | 25482 | 4353 | Zeke | NaN | 13 | 10 |
18 | 888554962724278272 | This is Ralphus. He's powering up. Attempting ... | 19824 | 3588 | Ralphus | NaN | 13 | 10 |
19 | 888078434458587136 | This is Gerald. He was just told he didn't get... | 21677 | 3500 | Gerald | NaN | 12 | 10 |
Programatic Assessment
archive.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2356 entries, 0 to 2355 Data columns (total 17 columns): tweet_id 2356 non-null int64 in_reply_to_status_id 78 non-null float64 in_reply_to_user_id 78 non-null float64 timestamp 2356 non-null object source 2356 non-null object text 2356 non-null object retweeted_status_id 181 non-null float64 retweeted_status_user_id 181 non-null float64 retweeted_status_timestamp 181 non-null object expanded_urls 2297 non-null object rating_numerator 2356 non-null int64 rating_denominator 2356 non-null int64 name 2356 non-null object doggo 2356 non-null object floofer 2356 non-null object pupper 2356 non-null object puppo 2356 non-null object dtypes: float64(4), int64(3), object(10) memory usage: 313.0+ KB
archive.describe()
tweet_id | in_reply_to_status_id | in_reply_to_user_id | retweeted_status_id | retweeted_status_user_id | rating_numerator | rating_denominator | |
---|---|---|---|---|---|---|---|
count | 2.356000e+03 | 7.800000e+01 | 7.800000e+01 | 1.810000e+02 | 1.810000e+02 | 2356.000000 | 2356.000000 |
mean | 7.427716e+17 | 7.455079e+17 | 2.014171e+16 | 7.720400e+17 | 1.241698e+16 | 13.126486 | 10.455433 |
std | 6.856705e+16 | 7.582492e+16 | 1.252797e+17 | 6.236928e+16 | 9.599254e+16 | 45.876648 | 6.745237 |
min | 6.660209e+17 | 6.658147e+17 | 1.185634e+07 | 6.661041e+17 | 7.832140e+05 | 0.000000 | 0.000000 |
25% | 6.783989e+17 | 6.757419e+17 | 3.086374e+08 | 7.186315e+17 | 4.196984e+09 | 10.000000 | 10.000000 |
50% | 7.196279e+17 | 7.038708e+17 | 4.196984e+09 | 7.804657e+17 | 4.196984e+09 | 11.000000 | 10.000000 |
75% | 7.993373e+17 | 8.257804e+17 | 4.196984e+09 | 8.203146e+17 | 4.196984e+09 | 12.000000 | 10.000000 |
max | 8.924206e+17 | 8.862664e+17 | 8.405479e+17 | 8.874740e+17 | 7.874618e+17 | 1776.000000 | 170.000000 |
api.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2344 entries, 0 to 2343 Data columns (total 4 columns): Tid 2344 non-null int64 fav_count 2344 non-null int64 full_text 2344 non-null object retweet_count 2344 non-null int64 dtypes: int64(3), object(1) memory usage: 73.3+ KB
img.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2075 entries, 0 to 2074 Data columns (total 12 columns): tweet_id 2075 non-null int64 jpg_url 2075 non-null object img_num 2075 non-null int64 p1 2075 non-null object p1_conf 2075 non-null float64 p1_dog 2075 non-null bool p2 2075 non-null object p2_conf 2075 non-null float64 p2_dog 2075 non-null bool p3 2075 non-null object p3_conf 2075 non-null float64 p3_dog 2075 non-null bool dtypes: bool(3), float64(3), int64(2), object(4) memory usage: 152.1+ KB
api.head()
Tid | fav_count | full_text | retweet_count | |
---|---|---|---|---|
0 | 892420643555336193 | 38625 | This is Phineas. He's a mystical boy. Only eve... | 8541 |
1 | 892177421306343426 | 33105 | This is Tilly. She's just checking pup on you.... | 6282 |
2 | 891815181378084864 | 24922 | This is Archie. He is a rare Norwegian Pouncin... | 4161 |
3 | 891689557279858688 | 42022 | This is Darla. She commenced a snooze mid meal... | 8670 |
4 | 891327558926688256 | 40172 | This is Franklin. He would like you to stop ca... | 9422 |
# archive.subset(retweeted_status_id != 'NaN')
archive(archive['retweeted_status_id'] != None)
patients(patients['city'] == 'New York')
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-22-d1dcf400d198> in <module>() 1 # archive.subset(retweeted_status_id != 'NaN') ----> 2 archive(archive['retweeted_status_id'] != None) 3 patients(patients['city'] == 'New York') TypeError: 'DataFrame' object is not callable
# CHecking for Duplicates throughout the data sets
all_colums = pd.Series(list(archive) + list(api) + list(img))
all_colums[all_colums.duplicated()]
21 tweet_id dtype: object
Detect and document at least eight (8) quality issues and two (2) tidiness issues in your wrangle_act.ipynb Jupyter Notebook.
archive.duplicated(['tweet_id']).sum()
0
img.duplicated(['tweet_id']).sum()
0
archive.