From 2b2861ffd32f7a62b9bc7e7618cff64fb8fdc98d Mon Sep 17 00:00:00 2001 From: surya Date: Sun, 21 May 2023 02:10:06 +0530 Subject: [PATCH] Initial commit --- .gitignore | 163 ++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 25 ++++++++ README.md | 13 ++++ parse.py | 55 ++++++++++++++++ requirements.in | 1 + 5 files changed, 257 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 parse.py create mode 100644 requirements.in diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d9f7ebd --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + + +backup diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..25194e6 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +2023: .ipl-data/.c-install 2023/ipl-data-2023.pdf + .ipl-data/bin/python parse.py 2023/ipl-data-2023.pdf 2023/ipl-data-2023.json + +2023/ipl-data-2023.pdf: + mkdir -p 2023 + curl https://documents.bcci.tv/bcci/documents/1676632383158_TATA%20IPL%202023%20-%20Match%20Schedule.pdf > 2023/ipl-data-2023.pdf + +.ipl-data/bin/activate: + python -m venv .ipl-data + +.ipl-data/bin/pip-compile: .ipl-data/bin/activate + .ipl-data/bin/python -m pip install pip-tools + +requirements.txt: requirements.in .ipl-data/bin/pip-compile + .ipl-data/bin/pip-compile --output-file=- > requirements.txt + +.ipl-data/.c-install: requirements.txt + .ipl-data/bin/pip install -r requirements.txt + touch .ipl-data/.c-install + +clean: + rm -rf .ipl-data + rm -rf __pycache__ + rm -rf requirements.txt + rm -rf 2023/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..19d2a87 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# IPL Fixtures dataset + +## Requirements + +- Python 3 +- Some version of Linux + +## Run + +To get the fixture data for 2023, run +`make 2023` + +Output is present in `2023/ipl-data-2023.json` diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..12b3f7f --- /dev/null +++ b/parse.py @@ -0,0 +1,55 @@ +import tabula +import json +import sys + +class Match: + day_n: int + match_n: int + day: str + date: str + time: str + home: str + away: str + venue: str + +def main(): + input_file = sys.argv[1] + output_file = sys.argv[2] + print ("Reading from " + input_file) + dataframes = tabula.read_pdf(input_file, pages = [1, 2]) + matches = [] + teams = ['Sunrisers Hyderabad', 'Gujarat Titans', 'Mumbai Indians', 'Chennai Super Kings', + 'Lucknow Super Giants', 'Delhi Capitals', 'Kolkata Knight Riders', 'Rajasthan Royals', + 'Royal Challengers Bangalore', 'Punjab Kings', 'Qualifier 1 - Team 1 vs Team 2', + 'Eliminator - Team 3 vs Team 4', 'Qualifier 2 - Winner of Eliminator vs Loser of Qualifier 1', + 'TATA IPL 2023 Final - Winner of Qualifier 1 vs. Winner of Qualifier 2'] + + for dataframe in dataframes: + for i in range (2,len(dataframe["MATCH"])): + x = Match(); + x.day_n = dataframe["MATCH"][i] + x.match_n = dataframe["MATCH.1"][i] + x.day = dataframe["Unnamed: 0"][i] + x.date = dataframe["Unnamed: 1"][i] + x.time = dataframe["Unnamed: 2"][i] + homeaway = dataframe["Unnamed: 3"][i] + home = "" + away = "" + for j in teams: + if homeaway.startswith(j): + home = j + if homeaway.endswith(j): + away = j + x.home = home + x.away = away + x.venue = dataframe["Unnamed: 5"][i] + teams.append(x.home) + matches.append(x) + + print ("Writing to " + output_file) + f = open(output_file, 'w') + json.dump(matches, f, indent = 4, default = lambda x: x.__dict__) + f.close() + +if __name__ == "__main__": + main() diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..6111c99 --- /dev/null +++ b/requirements.in @@ -0,0 +1 @@ +tabula-py