#!/usr/bin/env python3
# coding: utf-8

# Hands-on 2: "Adding a Second Query"
# You should not need to modify the analysis code

#%%
from common.df import get_deduped_df
from common.graphs import save_plot, setup_plots
from common.tables import get_styler, highlight_cols, highlight_rows, save_table
import matplotlib.pyplot as plt

if __name__ == '__main__':
    # read the CSV into a DataFrame

    # the CSV will look like:
    # "o","10278695","src/com/example/swen303/domainObjects/Colour.java","1"
    # "o","10401287","src/main/java/org/springframework/data/couchbase/core/WriteResultChecking.java","1"
    # "o","10401287","src/main/java/org/springframework/data/couchbase/core/convert/DateConverters.java","12"
    # ...
    df = get_deduped_df('task2', 'task2', drop=['var'], names=['var', 'project', 'file', 'enums'])

    df2 = df.groupby(['project']).sum()

    # build a summary table
    style = highlight_rows(highlight_cols(get_styler(df2.describe())))
    save_table(style, 'task2.tex')

    setup_plots()

    # plot the values for each project
    df2.plot()
    save_plot('task2-projects.png')

    # plot histogram, ignoring the long tail
    df2.plot.hist(bins=list(range(1,21)))
    save_plot('task2-hist.png')
