In [1]:
# import Statements

import numpy as np
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

print("-------------------->>  Imports successful")
-------------------->>  Imports successful
In [2]:
#Setting up Path of the Database
path = "data/"
database = path + 'database.sqlite'

#Connection to Database
conn = sqlite3.connect(database)
print("-------------------->>Debug: Database connected")

#Command used to check tables in DB
tables = pd.read_sql("""SELECT * FROM sqlite_sequence""",conn)
tables
-------------------->>Debug: Database connected
Out[2]:
name seq
0 Team 103916
1 Country 51958
2 League 51958
3 Match 51958
4 Player 11075
5 Player_Attributes 183978
6 Team_Attributes 1458

Countries & Leauges

In [3]:
country = pd.read_sql("SELECT * from Country",conn)
country
Out[3]:
id name
0 1 Belgium
1 1729 England
2 4769 France
3 7809 Germany
4 10257 Italy
5 13274 Netherlands
6 15722 Poland
7 17642 Portugal
8 19694 Scotland
9 21518 Spain
10 24558 Switzerland
In [4]:
country.count()
Out[4]:
id      11
name    11
dtype: int64
In [5]:
#information about leauges
league = pd.read_sql("SELECT * from League",conn)
league
Out[5]:
id country_id name
0 1 1 Belgium Jupiler League
1 1729 1729 England Premier League
2 4769 4769 France Ligue 1
3 7809 7809 Germany 1. Bundesliga
4 10257 10257 Italy Serie A
5 13274 13274 Netherlands Eredivisie
6 15722 15722 Poland Ekstraklasa
7 17642 17642 Portugal Liga ZON Sagres
8 19694 19694 Scotland Premier League
9 21518 21518 Spain LIGA BBVA
10 24558 24558 Switzerland Super League
In [6]:
league.count()
Out[6]:
id            11
country_id    11
name          11
dtype: int64

Looking at the above two tables of coutriees and leauges.

Both have a count of 11. This means there 11 vertical's for country and legues we have to process.

In [7]:
#The variable name con_leauge is country + Leauge, need to clean this table Later)
con_league = pd.read_sql("""SELECT * from League 
                            JOIN
                            Country on Country.id = League.country_id;    """,conn)
con_league
Out[7]:
id country_id name id name
0 1 1 Belgium Jupiler League 1 Belgium
1 1729 1729 England Premier League 1729 England
2 4769 4769 France Ligue 1 4769 France
3 7809 7809 Germany 1. Bundesliga 7809 Germany
4 10257 10257 Italy Serie A 10257 Italy
5 13274 13274 Netherlands Eredivisie 13274 Netherlands
6 15722 15722 Poland Ekstraklasa 15722 Poland
7 17642 17642 Portugal Liga ZON Sagres 17642 Portugal
8 19694 19694 Scotland Premier League 19694 Scotland
9 21518 21518 Spain LIGA BBVA 21518 Spain
10 24558 24558 Switzerland Super League 24558 Switzerland

Checking Team Data

In [8]:
teams = pd.read_sql("SELECT * from Team ORDER BY id LIMIT 10" ,conn) 


teams
Out[8]:
id team_api_id team_fifa_api_id team_long_name team_short_name
0 1 9987 673.0 KRC Genk GEN
1 2 9993 675.0 Beerschot AC BAC
2 3 10000 15005.0 SV Zulte-Waregem ZUL
3 4 9994 2007.0 Sporting Lokeren LOK
4 5 9984 1750.0 KSV Cercle Brugge CEB
5 6 8635 229.0 RSC Anderlecht AND
6 7 9991 674.0 KAA Gent GEN
7 8 9998 1747.0 RAEC Mons MON
8 9 7947 NaN FCV Dender EH DEN
9 10 9985 232.0 Standard de Liège STL

List of Matches

In [9]:
matches = pd.read_sql("""SELECT * FROM  Match LIMIT 0""",conn)

matches
Out[9]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... SJA VCH VCD VCA GBH GBD GBA BSH BSD BSA

0 rows × 115 columns

In [10]:
matches = pd.read_sql("""
    SELECT Match.id,
    Match.country_id, 
    league_id ,
    season,
    stage,
    date,
    home_team_api_id as home_id,
    away_team_api_id as away_id,
    home_team_goal,
    away_team_goal
from Match
JOIN Country on Country.id = Match.country_id
JOIN League on League.id = Match.league_id
LEFT JOIN Team on Team.id = Match.home_team_api_id
WHERE Match.country_id = '21518'
LIMIT 10
"""
,conn)

# JOIN Team on Team.id = Match.away_team_api_id

matches
Out[10]:
id country_id league_id season stage date home_id away_id home_team_goal away_team_goal
0 21518 21518 21518 2008/2009 1 2008-08-30 00:00:00 10267 8661 3 0
1 21519 21518 21518 2008/2009 1 2008-08-31 00:00:00 8371 10205 1 1
2 21520 21518 21518 2008/2009 1 2008-08-31 00:00:00 9783 8633 2 1
3 21521 21518 21518 2008/2009 1 2008-08-31 00:00:00 8388 8634 1 0
4 21522 21518 21518 2008/2009 1 2008-08-31 00:00:00 8696 8302 1 1
5 21523 21518 21518 2008/2009 1 2008-08-31 00:00:00 9869 8305 1 2
6 21524 21518 21518 2008/2009 1 2008-08-31 00:00:00 8603 8479 0 1
7 21525 21518 21518 2008/2009 1 2008-08-30 00:00:00 8558 10281 1 0
8 21526 21518 21518 2008/2009 1 2008-08-31 00:00:00 8315 9865 1 3
9 21527 21518 21518 2008/2009 1 2008-08-31 00:00:00 9906 9864 4 0