#!/usr/bin/python
""" 
by G.Landais (21-apr-2016)
get ATNF (pulsar catalogue)
NOTE: the URL get a CSV embeddeed in HTML

Usage: pgm [-v version] [-t] [-i] [-h]
       -t : do not use template
       -i : not interactive 

1- get data from URL
2- generate CSV from html
3- generate ASCII from CSV

Note: requires pyreadme/cds library (PYTHONPATH=/data/too/pyreadme/)
EXIT 0: nothing new
1: error
2: updated
"""

import re
import sys
import urllib
import time as tm
import getopt

from cds.core import *
import catb

URL="http://www.atnf.csiro.au/people/pulsar/psrcat/proc_form.php?version=VERSION&table_top.x=46&table_top.y=11&Name=Name&JName=JName&RaJ=RaJ&DecJ=DecJ&PMRA=PMRA&PMDec=PMDec&PX=PX&PosEpoch=PosEpoch&GL=GL&GB=GB&RaJD=RaJD&DecJD=DecJD&P0=P0&P1=P1&F0=F0&F1=F1&F2=F2&F3=F3&PEpoch=PEpoch&DM=DM&DM1=DM1&RM=RM&W50=W50&W10=W10&Units=Units&Tau_sc=Tau_sc&S400=S400&S1400=S1400&S2000=S2000&Binary=Binary&T0=T0&PB=PB&A1=A1&OM=OM&Ecc=Ecc&Tasc=Tasc&Eps1=Eps1&Eps2=Eps2&Minmass=Minmass&Medmass=Medmass&Bincomp=Bincomp&Dist=Dist&Dist_DM=Dist_DM&DMsinb=DMsinb&ZZ=ZZ&XX=XX&YY=YY&Assoc=Assoc&Survey=Survey&OSurvey=OSurvey&Date=Date&Type=Type&NGlt=NGlt&R_lum=R_lum&R_lum14=R_lum14&Age=Age&Bsurf=Bsurf&Edot=Edot&Edotd2=Edotd2&PMtot=PMtot&VTrans=VTrans&P1_i=P1_i&Age_i=Age_i&Bsurf_i=Bsurf_i&B_LC=B_LC&startUserDefined=true&c1_val=&c2_val=&c3_val=&c4_val=&sort_attr=jname&sort_order=asc&condition=&pulsar_names=&ephemeris=short&coords_unit=raj%2Fdecj&radius=&coords_1=&coords_2=&style=Long+csv+with+errors&no_value=&fsize=3&x_axis=&x_scale=linear&y_axis=&y_scale=linear&state=query"
NAME="psr"
VERSION="latest"
READMETEMPLATE="/home/cats/B/psr/ReadMe.template"

URL_REF="http://www.atnf.csiro.au/people/pulsar/psrcat/psrcat_ref.html"
REFNAME="reference"

def getPSRFile(filenames):
    out = filenames[0]+".html"
    catb.debug("GET data ("+out+") "+URL)
#    fd = urllib.urlopen(URL)
#    fout = open(out, "w")
#    for line in fd:
#        fout.write(line)
#    fout.close()
#    fd.close()
    reg = re.compile("^(.*)\t([+-]*\d*):(\d*):([.\d]*)\t([+-]*\d*):(\d*):([.\d]*)\t(.*)$")
    outcsv = out.replace(".html", ".csv")
    catb.debug("create CSV ("+out+"->"+outcsv+")")
    begin=False
    fin=open(out)
    fout=open(outcsv, "w")
    columns = []
    for line in fin:
        if line[0]=='#':
            begin = True
            col = line.strip().replace("#","").split(";")
            # column is divided in 2 (col+ref) or 3 (col+error+ref) sub columns 
            nbsubcol = 0 
            lastcname = None
            for c in col:
                if c == "": 
                    nbsubcol += 1
                    if lastcname == "RAJ": columns.append("ref")
                    else: columns.append(None)
                else: 
                    if nbsubcol == 1: 
                        if len(columns) == 1 : columns[0] = "Seq"
                    elif nbsubcol == 2:
                        columns[len(columns)-2] = "e_"+lastcname

                    nbsubcol = 0
                    lastcname = c
                    columns.append(c)
            catb.debug(columns)

            newline = "#"+columns[0]
            for i in range(1,len(columns)): 
                if columns[i] != None: newline += "\t"+columns[i]
            fout.write(newline+"\n")
            
        elif re.search("^</pre.*", line):
            break
        elif begin == True : 
            record = line.replace("*", "nan").split(";")
            if len(record) != len(columns): raise Exception("wrong number of columns")
            newline = record[0]
            for i in range(1,len(columns)):
                if columns[i]: newline += "\t"+record[i]
            fout.write(newline+"\n")

    fin.close()
    fout.close()
    return [outcsv, getReference(filenames[1])]

def getReference(filename):
    out = filename+".html"
    catb.debug("GET data ("+out+") "+URL_REF)
    fd = urllib.urlopen(URL_REF)
    fout = open(out, "w")
    for line in fd:
        fout.write(line.strip())
    fout.close()
    fd.close()

    # HTML to TSV
    fin=open(out, "r")
    html = fin.readline()
    fin.close()
    html = re.sub(r"^.*<div *[^>]*box[^>]*>",r"", html)
    html = re.sub(r"</table>.*$",r"", html)
    html = re.sub(r"^.*<table[^>]*>",r"", html)
    html = re.sub(r"<tr[^>]*>", r"", html)
    html = re.sub(r"<td[^>]*>", r"", html)
    html = re.sub(r":* *</td>", r"\t", html)
    html = re.sub(r"\t*</tr>", r"\n", html)
    html = re.sub(r"<[^>]*>", r"", html)
    outfilename = filename+".tsv"
    fout=open(outfilename, "w")
    fout.write(html)
    fout.close()
    return outfilename

def PSRtoascii(tablemaker, filenames):
    filein = filenames[0]
    fileout = filein.replace(".csv", ".dat")
    catb.debug("create VizieR Ascii table ("+filein+"->"+fileout+")")
    cdstable = CDSFileTable(filein, name=fileout, description="ATNF Pulsar Catalogue", data_start=1)

    table = tablemaker.addTable(cdstable, nullvalue='nan')

    # get header
    fd = open(filein)
    columns = fd.readline().replace("#","").split("\t")
    units = fd.readline().split("\t")
    fd.close()

    for i in range(len(cdstable.columns)):
        col = cdstable.columns[i]
        if len(units[i].strip(' ')) > 0: 
            col.unit = units[i]
        col.name = columns[i]

    # sexagesimal columns
    cdstable.columns[3].hasNull = True
    cdstable.columns[3].setSexaRa()
    cdstable.columns[6].hasNull = True
    cdstable.columns[6].setSexaDe()

    refout = ref2ascii(tablemaker, filenames[1])
    tablemaker.writeCDSTables()
    return [fileout, refout]

def ref2ascii(tablemaker, filename):
    fileout=filename.replace(".tsv", ".dat")
    catb.debug("create VizieR Ascii table ("+filename+"->"+fileout+")")
    cdstable = CDSFileTable(filename, name=fileout, description="ATNF References")

    table = tablemaker.addTable(cdstable, nullvalue='nan')
    cdstable.columns[0].name="Ref"
    cdstable.columns[0].description="Reference codes"
    cdstable.columns[1].name="Authors"
    cdstable.columns[1].description="Authors and contributors"
    return fileout


if __name__ == "__main__":
    interactive = True
    createReadMe = catb.default_makeReadMeFunction
 
    try:
        __opts, __args = getopt.getopt(sys.argv[1:], "hv:ci", ["help"])
    except getopt.GetoptError as err:
        help("__main__")
        sys.exit(1)
    for __o, __a in __opts:
        if __o in ("-h", "--help"):
            help("__main__")
            sys.exit(1)
        elif __o == "-v":
            VERSION == __a
        elif __o == "-c":
            if catb.confirm("create ReadMe from empty ? (this would delete previous definitions)") is False:
                sys.exit(0)
            createReadMe = catb.default_makeEmptyReadMeFunction
        elif __o == "-i":
            interactive = False

    if interactive is True:
        if catb.confirm("get ATNF version "+str(VERSION)) is False: 
            sys.exit(1)

    URL = URL.replace("VERSION", VERSION)
    factory = catb.BCatalogFactory(getPSRFile, PSRtoascii, createReadMe)
    factory.setReadMeTemplate(READMETEMPLATE)
    factory.save(["ReadMe", ".Summary", NAME+".dat", REFNAME+".dat"])
    factory.process([NAME, REFNAME])

    statusManager = catb.StatusManager()
    statusManager.updateRecords(str(factory.getnrecords()))
