from datetime import datetime
import csv
import sys
import logging
import statistics
import json
import os

# when people start to earn money, on average
age_of_work = 21

# these values are for 2017, while our data is for 2021 -- but the inflation in that range is <0.3%
average_monthly_saving_by_income_class = {
    5478: 753,
    3441: 165,
    5332: 736,
    6846: 1020,
    11169: 2501,
}

def parse_date(datestr):
    return datetime.strptime(datestr.strip(), '%Y-%m').date()

def load_data(raw_csv):
    data = []
    csvr = csv.reader(raw_csv.strip().split('\n'), delimiter=',', quotechar='"')
    for nrow, row in enumerate(csvr):
        try:
            dt = parse_date(row[0])
            value = float(row[1])
        except:
            # skip errors for headers
            if data:
                logging.exception(f"Parsing line {nrow} (will skip it): {row}")
            continue
        data.append([dt, value])
    return data

def compute_loss_since_year(data, startyr, endyr=None):
    # find first year
    ddata = {dt.year:value for dt, value in data if dt.month == 1}
    if endyr is None:
        endyr = max(ddata.keys()).year
    return (ddata[endyr] - ddata[startyr]) / ddata[startyr]
    # 100 / 20 => 5.0 => now it costs 500% of then

def compute_loss_over_range(data, birth_year, end_year=None):
    ddata = {dt.year:value for dt, value in data if dt.month == 1}
    if end_year is None:
        end_year = max(ddata.keys()).year
    inflation_series = [compute_loss_since_year(data, y, end_year) for y in range(birth_year, end_year)]
    # results2 = [(y, end_year, ddata[birth_year], ddata[end_year], compute_loss_since_year(data, y, end_year)) for y in range(birth_year, end_year)]
    # print(results2)
    return inflation_series

def simulate_loss_by_birth_year(data, simulation_start_year, simulation_end_year=None, monthly_average_savings=1289):
    start_year = simulation_start_year + age_of_work
    inflation_series = compute_loss_over_range(data, start_year, simulation_end_year)
    savings_year_series = [(12*monthly_average_savings)/(i+1) for i in inflation_series]
    total_loss = 0.0
    for inflation, savings in zip(inflation_series, savings_year_series):
        total_loss += savings * inflation
    return statistics.mean(inflation_series), total_loss

def main():
    if len(sys.argv) >= 2:
        birth_year = int(sys.argv[1])
        fname = sys.argv[2]
    else:
        birth_year = 1980
        fname = os.path.join(os.path.dirname(__file__), "snb-data-plkopr-en-selection-20210421_0900.csv")
    data = load_data(open(fname).read())
    for income_class, avg_monthly_savings in average_monthly_saving_by_income_class.items():
        results = []
        for birth_year in range(1935, 2000):
            pc_loss, value_loss = simulate_loss_by_birth_year(data, birth_year, 2021, avg_monthly_savings)
            results.append([income_class, avg_monthly_savings, birth_year, round(100*pc_loss, 2), int(value_loss)])
        print(json.dumps(results))

if __name__ == '__main__':
    main()