import argparse
import sqlite3
import re
import pandas as pd
import numpy as np

# Command to get last runid 
lastID="""SELECT runid FROM RUN ORDER BY runid DESC LIMIT 1
"""

def getLastRunID():
  r=c.execute(lastID)
  return(int(r.fetchone()[0]))


runid = 1

parser = argparse.ArgumentParser(description='Generate summary benchmarks')

parser.add_argument('-b', nargs='?',type = str, default="bench.db", help="Benchmark database")
parser.add_argument('-o', nargs='?',type = str, default="full.md", help="Full summary")
parser.add_argument('-r', action='store_true', help="Regression database")

# For runid or runid range
parser.add_argument('others', nargs=argparse.REMAINDER)

args = parser.parse_args()

c = sqlite3.connect(args.b)

if args.others:
   runid=int(args.others[0])
else:
   runid=getLastRunID()

# We extract data only from data tables
# Those tables below are used for descriptions
REMOVETABLES=['RUN','CORE', 'PLATFORM', 'COMPILERKIND', 'COMPILER', 'TYPE', 'CATEGORY', 'CONFIG']

# This is assuming the database is generated by the regression script
# So platform is the same for all benchmarks.
# Category and type is coming from the test name in the yaml
# So no need to add this information here
# Name is removed here because it is added at the beginning
REMOVECOLUMNS=['runid','NAME','type','platform','category','coredef','OPTIMIZED','HARDFP','FASTMATH','NEON','HELIUM','UNROLL','ROUNDING','DATE','compilerkindid','date','categoryid', 'ID', 'platformid', 'coreid', 'compilerid', 'typeid']

# Get existing benchmark tables
def getBenchTables():
    r=c.execute("SELECT name FROM sqlite_master WHERE type='table'")
    benchtables=[]
    for table in r:
        if not table[0] in REMOVETABLES:
          benchtables.append(table[0])
    return(benchtables)

# get existing types in a table
def getExistingTypes(benchTable):
    r=c.execute("select distinct typeid from %s" % benchTable).fetchall()
    result=[x[0] for x in r]
    return(result)

# Get compilers from specific type and table
versioncompiler="""select distinct compiler,version from %s 
  INNER JOIN COMPILER USING(compilerid)
  INNER JOIN COMPILERKIND USING(compilerkindid) WHERE typeid=?"""

# Get existing compiler in a table for a specific type
# (In case report is structured by types)
def getExistingCompiler(benchTable,typeid):
    r=c.execute(versioncompiler % benchTable,(typeid,)).fetchall()
    return(r)

# Get type name from type id
def getTypeName(typeid):
    r=c.execute("select type from TYPE where typeid=?",(typeid,)).fetchone()
    return(r[0])
 
# Diff of 2 lists 
def diff(first, second):
        second = set(second)
        return [item for item in first if item not in second]


# Command to get data for specific compiler 
# and type
benchCmd="""select %s from %s
  INNER JOIN CATEGORY USING(categoryid)
  INNER JOIN PLATFORM USING(platformid)
  INNER JOIN CORE USING(coreid)
  INNER JOIN COMPILER USING(compilerid)
  INNER JOIN COMPILERKIND USING(compilerkindid)
  INNER JOIN TYPE USING(typeid)
  WHERE compiler=? AND VERSION=? AND typeid = ? AND runid = ?
  """

# Command to get data for specific compiler 
# and type
nbElemsCmd="""select count(*) from %s
  INNER JOIN CATEGORY USING(categoryid)
  INNER JOIN PLATFORM USING(platformid)
  INNER JOIN CORE USING(coreid)
  INNER JOIN COMPILER USING(compilerid)
  INNER JOIN COMPILERKIND USING(compilerkindid)
  INNER JOIN TYPE USING(typeid)
  WHERE compiler=? AND VERSION=? AND typeid = ? AND runid = ?
  """

# Command to get test names for specific compiler 
# and type
benchNames="""select distinct NAME from %s
  INNER JOIN COMPILER USING(compilerid)
  INNER JOIN COMPILERKIND USING(compilerkindid)
  INNER JOIN TYPE USING(typeid)
  WHERE compiler=? AND VERSION=? AND typeid = ? AND runid = ?
  """

# Command to get columns for specific table
benchCmdColumns="""select * from %s
  INNER JOIN CATEGORY USING(categoryid)
  INNER JOIN PLATFORM USING(platformid)
  INNER JOIN CORE USING(coreid)
  INNER JOIN COMPILER USING(compilerid)
  INNER JOIN COMPILERKIND USING(compilerkindid)
  INNER JOIN TYPE USING(typeid)
  """

def joinit(iterable, delimiter):
    it = iter(iterable)
    yield next(it)
    for x in it:
        yield delimiter
        yield x

# Is not a column name finishing by id 
# (often primary key for thetable)
def isNotIDColumn(col):
    if re.match(r'^.*id$',col):
        return(False)
    else:
        return(True)
    
# Get test names
# for specific typeid and compiler (for the data)
def getTestNames(benchTable,comp,typeid):
    vals=(comp[0],comp[1],typeid,runid)
    result=c.execute(benchNames % benchTable,vals).fetchall()
    return([x[0] for x in list(result)])

# Get nb elems in a table
def getNbElems(benchTable,comp,typeid):
    vals=(comp[0],comp[1],typeid,runid)
    result=c.execute(nbElemsCmd % benchTable,vals).fetchone()
    return(result[0])

# Get names of columns and data for a table
# for specific typeid and compiler (for the data)
def getColNamesAndData(benchTable,comp,typeid):
    cursor=c.cursor()
    result=cursor.execute(benchCmdColumns % (benchTable))
    cols= [member[0] for member in cursor.description]
    keepCols = ['NAME'] + [c for c in diff(cols , REMOVECOLUMNS) if isNotIDColumn(c)]
    keepColsStr = "".join(joinit(keepCols,","))
    vals=(comp[0],comp[1],typeid,runid)
    result=cursor.execute(benchCmd % (keepColsStr,benchTable),vals)
    vals =np.array([list(x) for x in list(result)])
    return(keepCols,vals)

# Write columns in markdown format
def writeColumns(f,cols):
    colStr = "".join(joinit(cols,"|"))
    f.write("|")
    f.write(colStr)
    f.write("|\n")
    sepStr="".join(joinit([":-:" for x in cols],"|"))
    f.write("|")
    f.write(sepStr)
    f.write("|\n")

# Write row in markdown format
def writeRow(f,row):
    row=[str(x) for x in row]
    rowStr = "".join(joinit(row,"|"))
    f.write("|")
    f.write(rowStr)
    f.write("|\n")

PARAMS=["NB","NumTaps", "NBA", "NBB", "Factor", "NumStages","VECDIM","NBR","NBC","NBI","IFFT", "BITREV"]

def regressionTableFor(name,output,ref,toSort,indexCols,field):
    data=ref.pivot_table(index=indexCols, columns='core', 
    values=[field], aggfunc='first')
       
    data=data.sort_values(toSort)
       
    cores = [c[1] for c in list(data.columns)]
    columns = diff(indexCols,['NAME']) + cores

    writeColumns(output,columns)
    dataForFunc=data.loc[name]
    if type(dataForFunc) is pd.DataFrame:
       for row in dataForFunc.itertuples():
           row=list(row)
           if type(row[0]) is int:
              row=[row[0]] + row[1:]
           else: 
              row=list(row[0]) + row[1:]
           writeRow(output,row)
    else:
       writeRow(output,dataForFunc)

def formatTableByCore(output,testNames,cols,vals):
    if vals.size != 0:
       ref=pd.DataFrame(vals,columns=cols)
       toSort=["NAME"]
       
       for param in PARAMS:
          if param in ref.columns:
             ref[param]=pd.to_numeric(ref[param])
             toSort.append(param)
       if args.r:
         #  Regression table
         ref['MAX']=pd.to_numeric(ref['MAX'])
         ref['MAXREGCOEF']=pd.to_numeric(ref['MAXREGCOEF'])
       
         indexCols=diff(cols,['core','Regression','MAXREGCOEF','MAX','version','compiler'])
         valList = ['Regression']
       else:
         ref['CYCLES']=pd.to_numeric(ref['CYCLES'])
       
         indexCols=diff(cols,['core','CYCLES','version','compiler'])
         valList = ['CYCLES']
      
       
       for name in testNames:
           if args.r:
              output.write("#### %s\n" % name)

              output.write("##### Regression\n" )
              regressionTableFor(name,output,ref,toSort,indexCols,'Regression')
              
              output.write("##### Max cycles\n" )
              regressionTableFor(name,output,ref,toSort,indexCols,'MAX')
              
              output.write("##### Max Reg Coef\n" )
              regressionTableFor(name,output,ref,toSort,indexCols,'MAXREGCOEF')

           else:
              data=ref.pivot_table(index=indexCols, columns='core', 
              values=valList, aggfunc='first')
       
              data=data.sort_values(toSort)
       
              cores = [c[1] for c in list(data.columns)]
              columns = diff(indexCols,['NAME']) + cores

              output.write("#### %s\n" % name)
              writeColumns(output,columns)
              dataForFunc=data.loc[name]
              if type(dataForFunc) is pd.DataFrame:
                 for row in dataForFunc.itertuples():
                     row=list(row)
                     if type(row[0]) is int:
                        row=[row[0]] + row[1:]
                     else: 
                        row=list(row[0]) + row[1:]
                     writeRow(output,row)
              else:
                 writeRow(output,dataForFunc)

# Add a report for each table
def addReportFor(output,benchName):
    print("Process %s\n" % benchName)
    output.write("# %s\n" % benchName)
    allTypes = getExistingTypes(benchName)
    # Add report for each type
    for aTypeID in allTypes:
        typeName = getTypeName(aTypeID)
        output.write("## %s\n" % typeName)
        ## Add report for each compiler
        allCompilers = getExistingCompiler(benchName,aTypeID)
        for compiler in allCompilers:
            #print(compiler)
            nbElems = getNbElems(benchName,compiler,aTypeID)
            # Print test results for table, type, compiler
            if nbElems > 0:
               output.write("### %s (%s)\n" % compiler)
               cols,vals=getColNamesAndData(benchName,compiler,aTypeID)
               names=getTestNames(benchName,compiler,aTypeID)
               formatTableByCore(output,names,cols,vals)
           

try:
  with open(args.o,"w") as output:
      benchtables=getBenchTables()
      for bench in benchtables:
          addReportFor(output,bench)
finally:
     c.close()