#!/usr/bin/env python3
# coding: utf-8

# Hands-on 3: "Analyzing Boa Outputs with Pandas"
# TODO you need to implement the missing analysis parts below

#%%
from common.df import get_df
import pandas as pd

if __name__ == '__main__':
    # TODO 1: get the counts of all studied projects and total number of Java files
    dfCounts = None
    total_files = None
    total_projs = None

    # TODO 2: load the Task 3 data
    df = None

    # modify the data frame to compute percentages
    def update_pct(r):
        if r['var'] == 'FilesPotential':
            r['count'] = f"{r['count'] / total_files * 100:.2f}%"
        elif r['var'] == 'ProjectsPotential':
            r['count'] = f"{r['count'] / total_projs * 100:.2f}%"
        else:
            count = r['count']
            if count > 1000000:
                count = f"{count / 1000000:.2f}M"
            elif count > 1000:
                count = str(count // 1000) + "K"
            r['count'] = str(count)
        return r
    df2 = df.apply(update_pct, axis=1)

    # modify the data frame to match target table's layout
    df3 = pd.pivot_table(df2, values='count', columns='kind', index='var', aggfunc=sum)

    # rearrange/rename the rows to match
    df4 = df3.rename(lambda n: n.replace('Potential', ''))
    df4 = df4.rename(columns={
        'BinaryLit': 'Binary Literals',
        'TryResources': 'Try with Resources',
        'Underscore': 'Underscore Literals',
    })

    df4 = df4.reindex(['Old', 'New', 'All', 'Files', 'Projects'])
    df4.insert(1, 'Varargs', df4.pop('Varargs'))

    # TODO 3: output the LaTex table
    pass