# parse snp500 data and output [date, price, price+dividends]
#
# expected format from stdin: CSV
# Date,SP500,Dividend,Earnings,Consumer Price Index,Long Interest Rate,Real Price,Real Dividend,Real Earnings,PE10
#
# commands: see usage()
#
# example:
#   # show dates in which investing for 25 years would give you less than 6% annualized returns
#   cat snp500.csv  | python3 process_snp500.py intervals_dividend_loss 25 0.06


from datetime import datetime, date, time, timedelta
import sys
import csv

def load_array(fobj):
    sp500 = []
    sp500read = csv.DictReader(fobj)
    # for n, row in enumerate(sp500read):
    for n, row in enumerate(fobj.read().split('\n')):
        if row.startswith('#'):
            continue
        row = row.strip().split(',')
        row[0] = datetime.strptime(row[0], r"%Y-%m-%d").date()
        row[1:] = [float(v) if v else 0.0 for v in row[1:3]]
        sp500.append({
            'Date': row[0],
            'Price': row[1],
            'Dividend': row[2],
        })
    return sp500

def get_price_with_dividend(min_date=None, tax=0.0):
    total_dividend = 0
    price_plus_dividend = []
    for row in sp500_ar:
        if min_date is not None and row['Date'] < min_date:
            continue
        current_dividend = row['Dividend']/12 * (1-tax)
        price_plus_dividend.append((row['Date'], row['Price'], round(row['Price'] + total_dividend, 2)))
        total_dividend += current_dividend
    return price_plus_dividend


def process_price_with_dividends():
    ppd = get_price_with_dividend(min_date=date(1928, 1, 1), tax=0.0)
    print('''["Date", "Price", "Price+dividends"],''')
    for p in ppd:
        print(f"""["{p[0].strftime(r'%Y-%m-%d')}", {p[1]}, {p[2]}],""")


def get_losing_interval(param, interval_years=25, total_yield=1.0):
    ppd = get_price_with_dividend(min_date=date(1928, 1, 1), tax=0.0)
    ppd_dict = {x[0]:x[1:] for x in ppd}
    loss_dates = []
    for item in ppd:
        start_date = item[0]
        end_date = start_date.replace(year=start_date.year + interval_years)
        start_value = item[1] if param == 'price' else item[2]
        if end_date in ppd_dict:
            end_value = ppd_dict[end_date][0] if param == 'price' else ppd_dict[end_date][1]
            if end_value < start_value*total_yield:
                loss_dates.append(start_date)
    return loss_dates

def process_intervals_loss(param, interval_years=25, yearly_yield=0.0):
    total_yield = (1 + yearly_yield) ** interval_years
    losing_pts = get_losing_interval(param, interval_years, total_yield)
    ppd = get_price_with_dividend(min_date=date(1928, 1, 1), tax=0.0)
    print(f"{len(losing_pts)} found ({round((100*len(losing_pts))/len(ppd), 2)}%):")
    for d in losing_pts:
        print(d)


def usage(msg=""):
    if msg:
        print(msg)
    print(f"Usage: {sys.argv[0]} <command>")
    print("Command in: {price_with_dividends, intervals_price_loss, intervals_dividend_loss}")
    print(f"<intervals_price_loss> [years=30] [yearly_yield=0] -- find dates where price_end < price_start+years with yearly_yield")
    print(f"<intervals_dividend_loss> [years=30] [yearly_yield=0] -- find dates where price+dividend_end < price+dividend_start+years with yearly_yield")


sp500_ar = load_array(sys.stdin)
# parse cmdline

try:
    cmd = sys.argv[1]
except:
    usage("Invalid parameters.")
    sys.exit(1)

if cmd == 'price_with_dividends':
    process_price_with_dividends()
elif cmd == 'intervals_price_loss':
    interval_yr = int(sys.argv[2]) if len(sys.argv) > 2 else 30
    yearly_yield = float(sys.argv[3]) if len(sys.argv) > 3 else 0
    process_intervals_loss('price', interval_yr, yearly_yield)
elif cmd == 'intervals_dividend_loss':
    interval_yr = int(sys.argv[2]) if len(sys.argv) > 2 else 30
    yearly_yield = float(sys.argv[3]) if len(sys.argv) > 3 else 0
    process_intervals_loss('dividend', interval_yr, yearly_yield)
else:
    usage(f"Unrecognized command '{cmd}'")